{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pysam\n",
    "import numpy as np\n",
    "from deepsignal3.utils.process_utils import complement_seq\n",
    "from deepsignal3.utils.ref_reader import get_contig2len,get_contig2len_n_seq\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "reference_path='/home/xiaoyifu/data/reference/chm13v2.0.fa'\n",
    "chrom2len,contigs = get_contig2len_n_seq(reference_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_q2tloc_from_cigar(r_cigar_tuple, strand, seq_len):\n",
    "    \"\"\"\n",
    "    insertion: -1, deletion: -2, mismatch: -3\n",
    "    :param r_cigar_tuple: pysam.alignmentSegment.cigartuples\n",
    "    :param strand: 1/-1 for fwd/rev\n",
    "    :param seq_len: read alignment length\n",
    "    :return: query pos to ref pos\n",
    "    \"\"\"\n",
    "    fill_invalid = -2\n",
    "    # get each base calls genomic position\n",
    "    q_to_r_poss = np.full(seq_len + 1, fill_invalid, dtype=np.int32)\n",
    "    # process cigar ops in read direction\n",
    "    curr_r_pos, curr_q_pos = 0, 0\n",
    "    cigar_ops = r_cigar_tuple if strand == 1 else r_cigar_tuple[::-1]\n",
    "    for op, op_len in cigar_ops:\n",
    "        if op == 1:\n",
    "            # inserted bases into ref\n",
    "            for q_pos in range(curr_q_pos, curr_q_pos + op_len):\n",
    "                q_to_r_poss[q_pos] = -1\n",
    "            curr_q_pos += op_len\n",
    "        elif op in (2, 3):\n",
    "            # deleted ref bases\n",
    "            curr_r_pos += op_len\n",
    "        elif op in (0, 7, 8):\n",
    "            # aligned bases\n",
    "            for op_offset in range(op_len):\n",
    "                q_to_r_poss[curr_q_pos + op_offset] = curr_r_pos + op_offset\n",
    "            curr_q_pos += op_len\n",
    "            curr_r_pos += op_len\n",
    "        elif op == 6:\n",
    "            # padding (shouldn't happen in mappy)\n",
    "            pass\n",
    "    q_to_r_poss[curr_q_pos] = curr_r_pos\n",
    "    if q_to_r_poss[-1] == fill_invalid:\n",
    "        raise ValueError(\n",
    "            (\n",
    "                \"Invalid cigar string encountered. Reference length: {}  Cigar \"\n",
    "                + \"implied reference length: {}\"\n",
    "            ).format(seq_len, curr_r_pos)\n",
    "        )\n",
    "    return q_to_r_poss\n",
    "\n",
    "def get_refloc_of_methysite_in_motif(seqstr, motifset, methyloc_in_motif=0):\n",
    "    \"\"\"\n",
    "\n",
    "    :param seqstr:\n",
    "    :param motifset:\n",
    "    :param methyloc_in_motif: 0-based\n",
    "    :return:\n",
    "    \"\"\"\n",
    "    motifset = set(motifset)\n",
    "    strlen = len(seqstr)\n",
    "    motiflen = len(list(motifset)[0])\n",
    "    sites = []\n",
    "    for i in range(0, strlen - motiflen + 1):\n",
    "        if seqstr[i : i + motiflen] in motifset:\n",
    "            sites.append(i + methyloc_in_motif)\n",
    "    return sites"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------\n",
      "______________________________\n"
     ]
    }
   ],
   "source": [
    "bamfile = pysam.AlignmentFile('/home/xiaoyifu/data/HG002/R9.4/part1/reverse_strand.bam', \"rb\", check_sq=False)\n",
    "bam_index=pysam.IndexedReads(bamfile)\n",
    "bam_index.build()\n",
    "read_iter=bam_index.find('ffb5ba94-60fa-42a5-b1dd-0f146a2a3e3f')\n",
    "\n",
    "for bam_read in read_iter:\n",
    "    dorado_pred = []\n",
    "    query_pos=[]\n",
    "    ref_readlocs=dict()\n",
    "    test_q_pos=[]\n",
    "    dorado_pred_pos=[]\n",
    "    dorado_pred_pos_c=[]\n",
    "    test_q_pos_c=[]\n",
    "    print('-----------')\n",
    "    reference_name=bam_read.reference_name\n",
    "    strand_code = -1 if bam_read.is_reverse else 1\n",
    "                \n",
    "    strand = \"-\" if bam_read.is_reverse else \"+\"\n",
    "    seq=bam_read.get_forward_sequence() #if  bam_read.get_forward_sequence() is not None else seq      \n",
    "    q_seq=bam_read.query_sequence #if bam_read.query_sequence is not None else q_seq\n",
    "    rseq=bam_read.get_reference_sequence()\n",
    "    rseq_complement=complement_seq(rseq)\n",
    "    q_seq_complement=complement_seq(q_seq)\n",
    "    #find_key=(read_name,reference_name)\n",
    "    ref_start=bam_read.reference_start\n",
    "    ref_end = bam_read.reference_end\n",
    "    cigar_tuples = bam_read.cigartuples\n",
    "    qalign_start = bam_read.query_alignment_start\n",
    "    qalign_end = bam_read.query_alignment_end\n",
    "    if bam_read.is_reverse:\n",
    "        seq_start = len(seq) - qalign_end\n",
    "        seq_end = len(seq) - qalign_start\n",
    "    else:\n",
    "        seq_start = qalign_start\n",
    "        seq_end = qalign_end\n",
    "    q_to_r_poss = get_q2tloc_from_cigar(\n",
    "        cigar_tuples, strand_code, (seq_end - seq_start)\n",
    "    )\n",
    "    # print(reference_name)\n",
    "    # print(ref_start)\n",
    "    # print(ref_end)\n",
    "    # print('query_length:')\n",
    "    # print(bam_read.query_length)\n",
    "    # print('query_alignment_end-query_alignment_start:')\n",
    "    # print(bam_read.query_alignment_end-bam_read.query_alignment_start)\n",
    "    # print('is_reverse:')\n",
    "    # print(bam_read.is_reverse)\n",
    "    \n",
    "    # print('origin seq:')\n",
    "    # print(seq)\n",
    "    # print('query seq:')\n",
    "    # print(q_seq)\n",
    "    # print('ref seq:')\n",
    "    # print(rseq)\n",
    "    # print('ref seq complement:')\n",
    "    # print(rseq_complement)\n",
    "    #contigs_complement=complement_seq(contigs[reference_name])\n",
    "    #print('8:')\n",
    "    #print(chrom2len[reference_name])\n",
    "    for read_pos,ref_pos in bam_read.get_aligned_pairs(matches_only=True):\n",
    "        ref_readlocs[ref_pos]=read_pos\n",
    "    ref_poss=[]\n",
    "    if bam_read.modified_bases != None:\n",
    "        ref_loc = bam_read.get_reference_positions(full_length=True)\n",
    "        for m, locs in bam_read.modified_bases_forward.items():\n",
    "            if m[0] == 'C' and m[2] == 'm':\n",
    "                for lc in locs:\n",
    "                    if ref_loc[lc[0]] != None:\n",
    "                        query_pos.append(lc[0] )  #if bam_read.is_forward else (len(seq)-lc[0]-1)\n",
    "                        rloc_c=ref_loc[lc[0]]\n",
    "                        rloc = ref_loc[lc[0]]   if bam_read.is_forward else ref_loc[len(seq)-lc[0]-1]\n",
    "                        \n",
    "                        if rloc !=None:\n",
    "                            test_q_pos.append(len(seq)-ref_readlocs[rloc]-1)\n",
    "                            test_q_pos_c.append(ref_readlocs[rloc])\n",
    "                        dorado_pred.append(lc[1])\n",
    "                        dorado_pred_pos_c.append(rloc_c)\n",
    "                        dorado_pred_pos.append(rloc)\n",
    "                        if seq_start<=lc[0]< seq_end:\n",
    "                            if q_to_r_poss[lc[0]-seq_start] != -1:\n",
    "                                if strand == \"-\":\n",
    "                                    # pos = '.'#loc_in_read\n",
    "                                    ref_pos = ref_end - 1 - q_to_r_poss[lc[0]-seq_start]\n",
    "                                    ref_poss.append(ref_pos)\n",
    "    # motif_seqs='CG'\n",
    "    # methyloc=0\n",
    "    # tsite_locs = get_refloc_of_methysite_in_motif(seq, set(motif_seqs), methyloc)\n",
    "    # for loc_in_read in tsite_locs:\n",
    "    #     if seq_start <= loc_in_read < seq_end:\n",
    "    #         offset_idx = loc_in_read - seq_start\n",
    "    #         if q_to_r_poss[offset_idx] != -1:\n",
    "    #             if strand == \"-\":\n",
    "    #                 # pos = '.'#loc_in_read\n",
    "    #                 ref_pos = ref_end - 1 - q_to_r_poss[offset_idx]\n",
    "    #             else:\n",
    "    #                 # pos = loc_in_read\n",
    "    #                 ref_pos = ref_start + q_to_r_poss[offset_idx]\n",
    "    #             ref_readlocs[ref_pos]=loc_in_read\n",
    "    #             ref_poss.append(ref_pos)\n",
    "    # print('-----------')\n",
    "    # print('read pos:')\n",
    "    # print(query_pos)\n",
    "    # print('ref pos:')\n",
    "    # print(dorado_pred_pos)\n",
    "    # print('ref poss:')\n",
    "    # print(ref_poss)\n",
    "    # print('query pos in seq:')\n",
    "    # for i in query_pos:\n",
    "    #     print(seq[i],end='')\n",
    "    # print('\\n')\n",
    "    # print('test_q_pos in seq:')\n",
    "    # for i in test_q_pos:\n",
    "    #     print(seq[i],end='')\n",
    "    # print('\\n')\n",
    "    # print('test_q_pos in q_seq:')\n",
    "    # for i in test_q_pos_c:\n",
    "    #     print(q_seq[i],end='')\n",
    "    # print('\\n')\n",
    "    # print('query_pos in q_seq:')\n",
    "    # for i in query_pos:\n",
    "    #     print(q_seq[i],end='')\n",
    "    # print('\\n')\n",
    "    # for i in query_pos:\n",
    "    #     print(q_seq[i-1],end='')\n",
    "    # print('\\n')\n",
    "    # print('ref_poss in rseq_complement:')\n",
    "    # for i in ref_poss:\n",
    "    #     print(rseq_complement[i-ref_end],end='')\n",
    "    # print('\\n')\n",
    "    # print('ref_poss in rseq_contig complement:')\n",
    "    # for i in ref_poss:\n",
    "    #     print(contigs_complement[i-ref_end],end='')\n",
    "    # print('\\n')\n",
    "    # print('ref_poss in rseq_contig:')\n",
    "    # for i in ref_poss:\n",
    "    #     print(contigs[reference_name][i],end='')\n",
    "    # print('\\n')\n",
    "    # for i in query_pos:\n",
    "    #     print(q_seq_complement[len(q_seq_complement)-i-3],end='')\n",
    "    # print('\\n')\n",
    "    # for i in query_pos:\n",
    "    #     print(q_seq_complement[len(q_seq_complement)-i-2],end='')\n",
    "    # print('\\n')\n",
    "    # for i in query_pos:\n",
    "    #     print(q_seq_complement[len(q_seq_complement)-i-1],end='')\n",
    "    # print('\\n')\n",
    "    \n",
    "    #for i in query_pos:\n",
    "    #    print(q_seq[i+1],end='')\n",
    "    # for i in test_q_pos:\n",
    "    #     print(q_seq[i],end='')\n",
    "    # print('\\n')\n",
    "    # print('test_q_pos')\n",
    "    # for i in test_q_pos:\n",
    "    #     print(q_seq[i-1],end='')\n",
    "    # print('\\n')\n",
    "    # for i in test_q_pos:\n",
    "    #     print(q_seq[i-2],end='')\n",
    "    # print('\\n')\n",
    "    # for i in query_pos:\n",
    "    #     print(seq[i+1],end='')\n",
    "    # print('\\n')\n",
    "    # print('***********')\n",
    "    # for i in dorado_pred_pos:\n",
    "    #     print(rseq_complement[i],end='')\n",
    "    # print('\\n')\n",
    "    #for i in dorado_pred_pos:\n",
    "    #    print(rseq[i-ref_start],end='')\n",
    "    #print('\\n')\n",
    "    #for i in dorado_pred_pos:\n",
    "    #    print(rseq[i+1-ref_start],end='')\n",
    "    #print('\\n')\n",
    "    #for i in dorado_pred_pos:\n",
    "    #    print(rseq[i-ref_end],end='')\n",
    "    #print('\\n')\n",
    "    #for i in dorado_pred_pos:\n",
    "    #    print(rseq[i+1-ref_end],end='')\n",
    "    #print('\\n')\n",
    "    # for i in dorado_pred_pos:\n",
    "    #     print(rseq[chrom2len[reference_name]-i-1-ref_end],end='')\n",
    "    # print('\\n')\n",
    "    # for i in dorado_pred_pos:\n",
    "    #     print(rseq[chrom2len[reference_name]-i-2-ref_end],end='')\n",
    "    # print('\\n')\n",
    "    # for i in dorado_pred_pos:\n",
    "    #     print(rseq[chrom2len[reference_name]-i-1-ref_start],end='')\n",
    "    # print('\\n')\n",
    "    # for i in dorado_pred_pos:\n",
    "    #     print(rseq[chrom2len[reference_name]-i-2-ref_start],end='')\n",
    "    # print('\\n')\n",
    "    # for i in dorado_pred_pos:\n",
    "    #     print(rseq_complement[chrom2len[reference_name]-i-1-ref_end],end='')\n",
    "    # print('\\n')\n",
    "    # for i in dorado_pred_pos:\n",
    "    #     print(rseq_complement[chrom2len[reference_name]-i-2-ref_end],end='')\n",
    "    # print('\\n')\n",
    "    # for i in dorado_pred_pos:\n",
    "    #     print(rseq_complement[chrom2len[reference_name]-i-1-ref_start],end='')\n",
    "    # print('\\n')\n",
    "    # for i in dorado_pred_pos:\n",
    "    #     print(rseq_complement[chrom2len[reference_name]-i-2-ref_start],end='')\n",
    "    # print('\\n')\n",
    "    print('______________________________')\n",
    "    #for i in dorado_pred_pos:\n",
    "    #    print(rseq_complement[i-ref_end],end='')\n",
    "    #print('\\n')\n",
    "    #for i in dorado_pred_pos:\n",
    "    #    print(rseq_complement[i+1-ref_end],end='')\n",
    "    #print('\\n')\n",
    "    #for i in dorado_pred_pos:\n",
    "    #    print(rseq_complement[i-ref_start],end='')\n",
    "    #print('\\n')\n",
    "    #for i in dorado_pred_pos:\n",
    "    #    print(rseq_complement[i+1-ref_start],end='')\n",
    "    #print('\\n')\n",
    "    #for i in dorado_pred_pos:\n",
    "    #    print(contigs[reference_name][i],end='')\n",
    "    #print('\\n')\n",
    "    #for i in dorado_pred_pos:\n",
    "    #    print(contigs[reference_name][i+1],end='')\n",
    "    #print('\\n')\n",
    "    #for i in dorado_pred_pos:\n",
    "    #    print(complement_seq(contigs[reference_name])[i],end='')\n",
    "    #print('\\n')\n",
    "    #for i in dorado_pred_pos:\n",
    "    #    print(complement_seq(contigs[reference_name])[i+1],end='')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TC\n",
      "\n",
      "AA\n",
      "\n",
      "CA\n",
      "\n",
      "TT\n",
      "\n",
      "TT\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in ref_poss:\n",
    "    print(rseq_complement[i-ref_start-1:i+1-ref_start],end='')\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TC\n",
      "\n",
      "AA\n",
      "\n",
      "CA\n",
      "\n",
      "TT\n",
      "\n",
      "TT\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in ref_poss:\n",
    "    print(rseq_complement[i-ref_end-1:i+1-ref_end],end='')\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in ref_poss:\n",
    "    print(rseq[i-ref_end-1:i+1-ref_end],end='')\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "first_valid_index = next((i for i, (_, ref_pos) in enumerate(bam_read.get_aligned_pairs()) if ref_pos is not None), len(bam_read.get_aligned_pairs()))\n",
    "last_valid_index = len(bam_read.get_aligned_pairs()) - 1 - next((i for i, (_, ref_pos) in enumerate(reversed(bam_read.get_aligned_pairs())) if ref_pos is not None), len(bam_read.get_aligned_pairs()))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3\n",
      "898\n"
     ]
    }
   ],
   "source": [
    "print(first_valid_index)\n",
    "print(last_valid_index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(0, None), (1, None), (2, None), (3, 163518767), (4, 163518768)]\n",
      "[(894, 163519651), (895, 163519652), (896, None)]\n"
     ]
    }
   ],
   "source": [
    "print(bam_read.get_aligned_pairs()[:5])\n",
    "print(bam_read.get_aligned_pairs()[897:900])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bam_read.is_reverse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "pos_pair=[]\n",
    "for read_pos,ref_pos in bam_read.get_aligned_pairs():\n",
    "    # if ref_pos is None:\n",
    "    #     continue\n",
    "    if read_pos is None:\n",
    "        if bam_read.is_reverse:\n",
    "            pos_pair.append((None,ref_end-ref_pos-1))\n",
    "        else:\n",
    "            pos_pair.append((None,ref_pos-ref_start))\n",
    "        continue\n",
    "    if ref_pos is None:\n",
    "        if bam_read.is_reverse:\n",
    "            pos_pair.append((len(seq)-read_pos-1,None))\n",
    "        else:\n",
    "            pos_pair.append((read_pos,None))\n",
    "        continue\n",
    "    if bam_read.is_reverse:\n",
    "        pos_pair.append((len(seq)-read_pos-1,ref_end-ref_pos-1))\n",
    "    else:\n",
    "        pos_pair.append((read_pos,ref_pos-ref_start))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3\n",
      "898\n"
     ]
    }
   ],
   "source": [
    "first_valid_index = next((i for i, (_, ref_pos) in enumerate(pos_pair) if ref_pos is not None), len(pos_pair))\n",
    "last_valid_index = len(pos_pair) - 1 - next((i for i, (_, ref_pos) in enumerate(reversed(pos_pair)) if ref_pos is not None), len(pos_pair))\n",
    "print(first_valid_index)\n",
    "print(last_valid_index)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(918, None), (917, None), (916, None), (915, 885), (914, 884)]\n",
      "[(24, 1), (23, 0), (22, None)]\n"
     ]
    }
   ],
   "source": [
    "print(pos_pair[:5])\n",
    "print(pos_pair[897:900])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "q_to_r_poss=dict()\n",
    "for read_pos,ref_pos in bam_read.get_aligned_pairs(matches_only=True):\n",
    "    \n",
    "    if bam_read.is_reverse:\n",
    "        q_to_r_poss[(len(seq)-read_pos-1)]=ref_end-ref_pos-1\n",
    "    else:\n",
    "        q_to_r_poss[read_pos]=ref_pos-ref_start"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "163518767\n",
      "163519653\n"
     ]
    }
   ],
   "source": [
    "print(ref_start)\n",
    "print(ref_end)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from deepsignal3.utils.process_utils import get_motif_seqs\n",
    "\n",
    "motif_seqs = get_motif_seqs('CG', True)\n",
    "tsite_locs = get_refloc_of_methysite_in_motif(\n",
    "                            rseq_complement, set(motif_seqs), 0)\n",
    "ref_readlocs_c = dict()\n",
    "ref_poss_c = []\n",
    "pred_pos_c = []\n",
    "for loc_in_read in tsite_locs:\n",
    "    if strand == \"-\":\n",
    "        ref_pos = ref_end-loc_in_read-2\n",
    "    else:\n",
    "        ref_pos = ref_start+loc_in_read\n",
    "    ref_readlocs_c[loc_in_read] = ref_pos\n",
    "    ref_poss_c.append(ref_pos)\n",
    "    pred_pos_c.append(loc_in_read)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in ref_poss_c:\n",
    "    print(contigs[reference_name][i:i+2],end='')\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CA\n",
      "CA\n"
     ]
    }
   ],
   "source": [
    "print(contigs[reference_name][ref_end-2:ref_end])\n",
    "print(rseq[-2:])\n",
    "#说明ref_end实际上是开区间，这一点不被包含在rseq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{915: 886,\n",
       " 914: 885,\n",
       " 913: 884,\n",
       " 912: 883,\n",
       " 911: 882,\n",
       " 910: 881,\n",
       " 909: 880,\n",
       " 908: 879,\n",
       " 907: 878,\n",
       " 906: 877,\n",
       " 905: 876,\n",
       " 904: 875,\n",
       " 903: 874,\n",
       " 902: 873,\n",
       " 901: 872,\n",
       " 900: 871,\n",
       " 899: 870,\n",
       " 898: 869,\n",
       " 897: 868,\n",
       " 896: 867,\n",
       " 895: 866,\n",
       " 894: 865,\n",
       " 893: 864,\n",
       " 892: 863,\n",
       " 891: 862,\n",
       " 890: 861,\n",
       " 889: 860,\n",
       " 888: 859,\n",
       " 887: 858,\n",
       " 886: 857,\n",
       " 885: 856,\n",
       " 884: 855,\n",
       " 883: 854,\n",
       " 882: 853,\n",
       " 881: 852,\n",
       " 880: 851,\n",
       " 879: 850,\n",
       " 878: 849,\n",
       " 877: 848,\n",
       " 876: 847,\n",
       " 875: 846,\n",
       " 874: 845,\n",
       " 873: 844,\n",
       " 872: 843,\n",
       " 871: 842,\n",
       " 870: 841,\n",
       " 869: 840,\n",
       " 868: 839,\n",
       " 867: 838,\n",
       " 866: 837,\n",
       " 865: 836,\n",
       " 864: 835,\n",
       " 863: 834,\n",
       " 862: 833,\n",
       " 861: 832,\n",
       " 860: 831,\n",
       " 859: 830,\n",
       " 858: 829,\n",
       " 857: 828,\n",
       " 856: 827,\n",
       " 855: 826,\n",
       " 854: 825,\n",
       " 853: 824,\n",
       " 852: 823,\n",
       " 851: 822,\n",
       " 850: 821,\n",
       " 849: 820,\n",
       " 848: 819,\n",
       " 847: 818,\n",
       " 846: 817,\n",
       " 845: 816,\n",
       " 844: 815,\n",
       " 843: 814,\n",
       " 842: 813,\n",
       " 841: 812,\n",
       " 840: 811,\n",
       " 839: 810,\n",
       " 838: 809,\n",
       " 837: 808,\n",
       " 836: 807,\n",
       " 835: 806,\n",
       " 834: 805,\n",
       " 833: 804,\n",
       " 832: 803,\n",
       " 831: 802,\n",
       " 830: 801,\n",
       " 829: 800,\n",
       " 828: 799,\n",
       " 827: 798,\n",
       " 826: 797,\n",
       " 825: 796,\n",
       " 824: 795,\n",
       " 823: 794,\n",
       " 822: 793,\n",
       " 821: 792,\n",
       " 820: 791,\n",
       " 819: 790,\n",
       " 818: 789,\n",
       " 817: 788,\n",
       " 816: 787,\n",
       " 815: 786,\n",
       " 814: 785,\n",
       " 813: 784,\n",
       " 812: 783,\n",
       " 811: 782,\n",
       " 810: 781,\n",
       " 809: 780,\n",
       " 808: 779,\n",
       " 807: 778,\n",
       " 806: 777,\n",
       " 805: 776,\n",
       " 804: 775,\n",
       " 803: 774,\n",
       " 802: 773,\n",
       " 801: 772,\n",
       " 800: 771,\n",
       " 799: 770,\n",
       " 798: 769,\n",
       " 797: 768,\n",
       " 796: 767,\n",
       " 795: 766,\n",
       " 794: 765,\n",
       " 793: 764,\n",
       " 792: 763,\n",
       " 791: 762,\n",
       " 790: 761,\n",
       " 789: 760,\n",
       " 788: 759,\n",
       " 787: 758,\n",
       " 786: 757,\n",
       " 785: 756,\n",
       " 784: 755,\n",
       " 783: 754,\n",
       " 782: 753,\n",
       " 781: 752,\n",
       " 780: 751,\n",
       " 779: 750,\n",
       " 778: 749,\n",
       " 777: 748,\n",
       " 776: 747,\n",
       " 775: 746,\n",
       " 774: 745,\n",
       " 772: 744,\n",
       " 771: 743,\n",
       " 770: 742,\n",
       " 769: 741,\n",
       " 768: 740,\n",
       " 767: 739,\n",
       " 766: 738,\n",
       " 765: 737,\n",
       " 764: 736,\n",
       " 763: 735,\n",
       " 762: 734,\n",
       " 761: 733,\n",
       " 760: 732,\n",
       " 759: 731,\n",
       " 758: 730,\n",
       " 757: 729,\n",
       " 756: 728,\n",
       " 755: 727,\n",
       " 754: 726,\n",
       " 753: 725,\n",
       " 752: 724,\n",
       " 751: 723,\n",
       " 750: 722,\n",
       " 749: 721,\n",
       " 748: 720,\n",
       " 747: 719,\n",
       " 746: 718,\n",
       " 745: 717,\n",
       " 744: 716,\n",
       " 743: 715,\n",
       " 742: 714,\n",
       " 741: 713,\n",
       " 740: 712,\n",
       " 739: 711,\n",
       " 738: 710,\n",
       " 737: 709,\n",
       " 736: 708,\n",
       " 735: 707,\n",
       " 734: 706,\n",
       " 733: 705,\n",
       " 732: 704,\n",
       " 731: 703,\n",
       " 730: 702,\n",
       " 729: 701,\n",
       " 728: 700,\n",
       " 727: 699,\n",
       " 726: 698,\n",
       " 725: 697,\n",
       " 724: 696,\n",
       " 723: 695,\n",
       " 722: 694,\n",
       " 721: 693,\n",
       " 720: 691,\n",
       " 719: 690,\n",
       " 718: 689,\n",
       " 717: 688,\n",
       " 716: 687,\n",
       " 715: 686,\n",
       " 714: 685,\n",
       " 713: 684,\n",
       " 712: 683,\n",
       " 711: 682,\n",
       " 710: 681,\n",
       " 709: 680,\n",
       " 708: 679,\n",
       " 707: 678,\n",
       " 706: 677,\n",
       " 705: 676,\n",
       " 704: 675,\n",
       " 703: 674,\n",
       " 702: 673,\n",
       " 701: 672,\n",
       " 700: 671,\n",
       " 699: 670,\n",
       " 698: 669,\n",
       " 697: 668,\n",
       " 696: 667,\n",
       " 695: 666,\n",
       " 694: 665,\n",
       " 693: 664,\n",
       " 692: 663,\n",
       " 691: 662,\n",
       " 690: 661,\n",
       " 689: 660,\n",
       " 688: 659,\n",
       " 687: 658,\n",
       " 686: 657,\n",
       " 685: 656,\n",
       " 684: 655,\n",
       " 683: 654,\n",
       " 682: 653,\n",
       " 681: 652,\n",
       " 680: 651,\n",
       " 679: 650,\n",
       " 678: 649,\n",
       " 677: 648,\n",
       " 676: 647,\n",
       " 675: 646,\n",
       " 674: 645,\n",
       " 673: 644,\n",
       " 672: 643,\n",
       " 671: 642,\n",
       " 670: 641,\n",
       " 669: 640,\n",
       " 668: 639,\n",
       " 667: 638,\n",
       " 666: 637,\n",
       " 665: 636,\n",
       " 664: 635,\n",
       " 663: 634,\n",
       " 662: 633,\n",
       " 661: 632,\n",
       " 660: 631,\n",
       " 659: 630,\n",
       " 658: 629,\n",
       " 657: 628,\n",
       " 656: 627,\n",
       " 655: 626,\n",
       " 654: 625,\n",
       " 653: 624,\n",
       " 652: 623,\n",
       " 651: 622,\n",
       " 650: 621,\n",
       " 649: 620,\n",
       " 648: 619,\n",
       " 647: 618,\n",
       " 646: 617,\n",
       " 645: 616,\n",
       " 644: 615,\n",
       " 643: 614,\n",
       " 642: 613,\n",
       " 641: 612,\n",
       " 640: 611,\n",
       " 639: 610,\n",
       " 638: 609,\n",
       " 637: 608,\n",
       " 636: 607,\n",
       " 635: 606,\n",
       " 634: 605,\n",
       " 633: 604,\n",
       " 632: 603,\n",
       " 631: 602,\n",
       " 630: 601,\n",
       " 629: 600,\n",
       " 628: 599,\n",
       " 627: 598,\n",
       " 626: 597,\n",
       " 625: 596,\n",
       " 624: 595,\n",
       " 623: 594,\n",
       " 622: 593,\n",
       " 621: 592,\n",
       " 620: 591,\n",
       " 619: 590,\n",
       " 618: 589,\n",
       " 617: 588,\n",
       " 616: 587,\n",
       " 615: 586,\n",
       " 614: 585,\n",
       " 613: 584,\n",
       " 612: 583,\n",
       " 611: 582,\n",
       " 610: 581,\n",
       " 609: 580,\n",
       " 608: 579,\n",
       " 607: 578,\n",
       " 606: 577,\n",
       " 605: 576,\n",
       " 604: 575,\n",
       " 603: 574,\n",
       " 602: 573,\n",
       " 601: 572,\n",
       " 600: 571,\n",
       " 599: 570,\n",
       " 598: 569,\n",
       " 597: 568,\n",
       " 596: 567,\n",
       " 595: 566,\n",
       " 594: 565,\n",
       " 593: 564,\n",
       " 592: 563,\n",
       " 591: 562,\n",
       " 590: 561,\n",
       " 589: 560,\n",
       " 588: 559,\n",
       " 587: 558,\n",
       " 586: 557,\n",
       " 585: 556,\n",
       " 584: 555,\n",
       " 583: 554,\n",
       " 582: 553,\n",
       " 581: 552,\n",
       " 580: 551,\n",
       " 579: 550,\n",
       " 574: 549,\n",
       " 573: 548,\n",
       " 572: 547,\n",
       " 571: 546,\n",
       " 570: 545,\n",
       " 569: 544,\n",
       " 568: 543,\n",
       " 567: 542,\n",
       " 566: 541,\n",
       " 565: 540,\n",
       " 564: 539,\n",
       " 563: 538,\n",
       " 562: 537,\n",
       " 561: 536,\n",
       " 560: 535,\n",
       " 559: 534,\n",
       " 558: 533,\n",
       " 557: 532,\n",
       " 556: 531,\n",
       " 555: 530,\n",
       " 554: 529,\n",
       " 553: 528,\n",
       " 552: 527,\n",
       " 551: 526,\n",
       " 550: 525,\n",
       " 549: 524,\n",
       " 548: 523,\n",
       " 547: 522,\n",
       " 546: 521,\n",
       " 545: 520,\n",
       " 544: 519,\n",
       " 543: 518,\n",
       " 542: 517,\n",
       " 541: 516,\n",
       " 540: 515,\n",
       " 539: 514,\n",
       " 538: 513,\n",
       " 537: 512,\n",
       " 536: 511,\n",
       " 535: 510,\n",
       " 534: 509,\n",
       " 533: 508,\n",
       " 532: 507,\n",
       " 531: 506,\n",
       " 530: 505,\n",
       " 529: 504,\n",
       " 528: 503,\n",
       " 527: 502,\n",
       " 526: 501,\n",
       " 525: 500,\n",
       " 524: 499,\n",
       " 523: 498,\n",
       " 522: 497,\n",
       " 521: 496,\n",
       " 520: 495,\n",
       " 519: 494,\n",
       " 518: 493,\n",
       " 517: 492,\n",
       " 516: 491,\n",
       " 515: 490,\n",
       " 514: 489,\n",
       " 513: 488,\n",
       " 512: 487,\n",
       " 511: 486,\n",
       " 510: 485,\n",
       " 509: 484,\n",
       " 508: 483,\n",
       " 507: 482,\n",
       " 506: 481,\n",
       " 505: 480,\n",
       " 504: 479,\n",
       " 503: 478,\n",
       " 502: 477,\n",
       " 501: 476,\n",
       " 500: 475,\n",
       " 499: 474,\n",
       " 498: 473,\n",
       " 497: 472,\n",
       " 496: 471,\n",
       " 495: 470,\n",
       " 494: 469,\n",
       " 493: 468,\n",
       " 492: 467,\n",
       " 491: 466,\n",
       " 490: 465,\n",
       " 489: 464,\n",
       " 488: 463,\n",
       " 487: 462,\n",
       " 486: 461,\n",
       " 485: 460,\n",
       " 484: 459,\n",
       " 483: 458,\n",
       " 482: 457,\n",
       " 481: 456,\n",
       " 480: 455,\n",
       " 479: 454,\n",
       " 478: 453,\n",
       " 477: 452,\n",
       " 476: 451,\n",
       " 475: 450,\n",
       " 474: 449,\n",
       " 473: 448,\n",
       " 472: 447,\n",
       " 471: 446,\n",
       " 470: 445,\n",
       " 469: 444,\n",
       " 468: 443,\n",
       " 467: 442,\n",
       " 466: 441,\n",
       " 465: 440,\n",
       " 464: 439,\n",
       " 463: 438,\n",
       " 462: 437,\n",
       " 461: 436,\n",
       " 460: 435,\n",
       " 459: 434,\n",
       " 458: 433,\n",
       " 457: 432,\n",
       " 456: 431,\n",
       " 455: 430,\n",
       " 454: 429,\n",
       " 453: 428,\n",
       " 452: 427,\n",
       " 451: 426,\n",
       " 450: 425,\n",
       " 449: 424,\n",
       " 448: 423,\n",
       " 447: 422,\n",
       " 446: 421,\n",
       " 445: 420,\n",
       " 444: 419,\n",
       " 443: 418,\n",
       " 442: 417,\n",
       " 441: 416,\n",
       " 440: 415,\n",
       " 439: 414,\n",
       " 438: 413,\n",
       " 437: 412,\n",
       " 436: 411,\n",
       " 435: 410,\n",
       " 434: 409,\n",
       " 433: 408,\n",
       " 432: 407,\n",
       " 431: 406,\n",
       " 430: 405,\n",
       " 429: 404,\n",
       " 428: 403,\n",
       " 427: 402,\n",
       " 426: 401,\n",
       " 425: 400,\n",
       " 424: 399,\n",
       " 423: 398,\n",
       " 422: 397,\n",
       " 421: 396,\n",
       " 420: 395,\n",
       " 419: 394,\n",
       " 418: 393,\n",
       " 417: 392,\n",
       " 416: 391,\n",
       " 415: 390,\n",
       " 414: 389,\n",
       " 413: 388,\n",
       " 412: 387,\n",
       " 411: 386,\n",
       " 410: 385,\n",
       " 409: 384,\n",
       " 408: 383,\n",
       " 407: 382,\n",
       " 406: 381,\n",
       " 405: 380,\n",
       " 404: 379,\n",
       " 403: 378,\n",
       " 402: 377,\n",
       " 401: 376,\n",
       " 400: 375,\n",
       " 399: 374,\n",
       " 398: 373,\n",
       " 397: 372,\n",
       " 396: 371,\n",
       " 395: 370,\n",
       " 394: 369,\n",
       " 393: 368,\n",
       " 392: 367,\n",
       " 391: 366,\n",
       " 390: 365,\n",
       " 389: 364,\n",
       " 388: 363,\n",
       " 387: 362,\n",
       " 386: 361,\n",
       " 384: 360,\n",
       " 383: 359,\n",
       " 382: 358,\n",
       " 381: 357,\n",
       " 380: 356,\n",
       " 379: 355,\n",
       " 378: 354,\n",
       " 377: 353,\n",
       " 376: 352,\n",
       " 375: 351,\n",
       " 374: 350,\n",
       " 373: 349,\n",
       " 372: 348,\n",
       " 371: 347,\n",
       " 370: 346,\n",
       " 369: 345,\n",
       " 368: 344,\n",
       " 367: 343,\n",
       " 366: 342,\n",
       " 365: 341,\n",
       " 364: 340,\n",
       " 363: 339,\n",
       " 362: 338,\n",
       " 361: 337,\n",
       " 360: 336,\n",
       " 359: 335,\n",
       " 358: 334,\n",
       " 357: 333,\n",
       " 356: 332,\n",
       " 355: 331,\n",
       " 354: 330,\n",
       " 353: 329,\n",
       " 352: 328,\n",
       " 351: 327,\n",
       " 350: 326,\n",
       " 349: 325,\n",
       " 348: 324,\n",
       " 347: 323,\n",
       " 346: 322,\n",
       " 345: 321,\n",
       " 344: 320,\n",
       " 343: 319,\n",
       " 342: 318,\n",
       " 341: 317,\n",
       " 340: 316,\n",
       " 339: 315,\n",
       " 338: 314,\n",
       " 337: 313,\n",
       " 336: 312,\n",
       " 335: 311,\n",
       " 334: 310,\n",
       " 333: 309,\n",
       " 332: 308,\n",
       " 331: 307,\n",
       " 330: 306,\n",
       " 329: 305,\n",
       " 328: 304,\n",
       " 327: 303,\n",
       " 326: 302,\n",
       " 325: 301,\n",
       " 324: 300,\n",
       " 323: 299,\n",
       " 322: 298,\n",
       " 321: 297,\n",
       " 320: 296,\n",
       " 319: 295,\n",
       " 318: 294,\n",
       " 317: 293,\n",
       " 316: 292,\n",
       " 314: 291,\n",
       " 313: 290,\n",
       " 312: 289,\n",
       " 311: 288,\n",
       " 310: 287,\n",
       " 309: 286,\n",
       " 308: 285,\n",
       " 307: 284,\n",
       " 306: 283,\n",
       " 305: 282,\n",
       " 304: 281,\n",
       " 303: 280,\n",
       " 302: 279,\n",
       " 301: 278,\n",
       " 300: 277,\n",
       " 299: 276,\n",
       " 298: 275,\n",
       " 297: 274,\n",
       " 296: 273,\n",
       " 295: 270,\n",
       " 294: 269,\n",
       " 293: 268,\n",
       " 292: 267,\n",
       " 291: 266,\n",
       " 290: 265,\n",
       " 289: 264,\n",
       " 288: 263,\n",
       " 287: 262,\n",
       " 286: 261,\n",
       " 285: 260,\n",
       " 284: 259,\n",
       " 283: 258,\n",
       " 282: 257,\n",
       " 281: 256,\n",
       " 280: 255,\n",
       " 279: 254,\n",
       " 278: 253,\n",
       " 277: 252,\n",
       " 276: 251,\n",
       " 275: 250,\n",
       " 274: 249,\n",
       " 273: 248,\n",
       " 272: 247,\n",
       " 271: 246,\n",
       " 270: 245,\n",
       " 269: 244,\n",
       " 268: 243,\n",
       " 267: 242,\n",
       " 266: 241,\n",
       " 265: 240,\n",
       " 264: 239,\n",
       " 263: 238,\n",
       " 262: 237,\n",
       " 261: 236,\n",
       " 260: 235,\n",
       " 259: 234,\n",
       " 258: 233,\n",
       " 257: 232,\n",
       " 256: 231,\n",
       " 255: 230,\n",
       " 254: 229,\n",
       " 253: 228,\n",
       " 252: 227,\n",
       " 251: 226,\n",
       " 250: 225,\n",
       " 249: 224,\n",
       " 248: 223,\n",
       " 247: 222,\n",
       " 246: 221,\n",
       " 245: 220,\n",
       " 244: 219,\n",
       " 243: 218,\n",
       " 242: 217,\n",
       " 241: 216,\n",
       " 240: 215,\n",
       " 239: 214,\n",
       " 238: 213,\n",
       " 237: 212,\n",
       " 236: 211,\n",
       " 235: 210,\n",
       " 234: 209,\n",
       " 233: 208,\n",
       " 232: 207,\n",
       " 231: 206,\n",
       " 230: 205,\n",
       " 229: 204,\n",
       " 228: 203,\n",
       " 227: 202,\n",
       " 226: 201,\n",
       " 225: 200,\n",
       " 224: 199,\n",
       " 223: 198,\n",
       " 222: 197,\n",
       " 221: 196,\n",
       " 220: 195,\n",
       " 219: 194,\n",
       " 218: 193,\n",
       " 217: 192,\n",
       " 216: 191,\n",
       " 215: 190,\n",
       " 214: 189,\n",
       " 213: 188,\n",
       " 212: 187,\n",
       " 211: 186,\n",
       " 210: 185,\n",
       " 209: 184,\n",
       " 208: 183,\n",
       " 207: 182,\n",
       " 206: 181,\n",
       " 205: 180,\n",
       " 204: 179,\n",
       " 203: 178,\n",
       " 202: 177,\n",
       " 201: 176,\n",
       " 200: 175,\n",
       " 199: 174,\n",
       " 198: 173,\n",
       " 197: 172,\n",
       " 196: 171,\n",
       " 195: 170,\n",
       " 194: 169,\n",
       " 193: 168,\n",
       " 192: 167,\n",
       " 191: 166,\n",
       " 190: 165,\n",
       " 189: 164,\n",
       " 188: 163,\n",
       " 187: 162,\n",
       " 186: 161,\n",
       " 185: 160,\n",
       " 184: 159,\n",
       " 183: 158,\n",
       " 182: 157,\n",
       " 181: 156,\n",
       " 180: 155,\n",
       " 179: 154,\n",
       " 178: 153,\n",
       " 177: 152,\n",
       " 176: 151,\n",
       " 175: 150,\n",
       " 174: 149,\n",
       " 173: 148,\n",
       " 172: 147,\n",
       " 171: 146,\n",
       " 170: 145,\n",
       " 169: 144,\n",
       " 168: 143,\n",
       " 167: 142,\n",
       " 166: 141,\n",
       " 165: 140,\n",
       " 164: 139,\n",
       " 163: 138,\n",
       " 162: 137,\n",
       " 161: 136,\n",
       " 160: 135,\n",
       " 159: 134,\n",
       " 158: 133,\n",
       " 157: 132,\n",
       " 156: 131,\n",
       " 155: 130,\n",
       " 154: 129,\n",
       " 153: 128,\n",
       " 152: 127,\n",
       " 151: 126,\n",
       " 150: 125,\n",
       " 149: 124,\n",
       " 148: 123,\n",
       " 147: 122,\n",
       " 146: 121,\n",
       " 145: 120,\n",
       " 144: 119,\n",
       " 143: 118,\n",
       " 142: 117,\n",
       " 141: 116,\n",
       " 140: 115,\n",
       " 139: 114,\n",
       " 138: 113,\n",
       " 137: 112,\n",
       " 136: 111,\n",
       " 135: 110,\n",
       " 134: 109,\n",
       " 133: 108,\n",
       " 132: 107,\n",
       " 131: 106,\n",
       " 130: 105,\n",
       " 129: 104,\n",
       " 128: 103,\n",
       " 127: 102,\n",
       " 126: 101,\n",
       " 125: 100,\n",
       " 124: 99,\n",
       " 123: 98,\n",
       " 122: 97,\n",
       " 121: 96,\n",
       " 120: 95,\n",
       " 119: 94,\n",
       " 118: 93,\n",
       " 117: 92,\n",
       " 116: 91,\n",
       " 115: 90,\n",
       " 114: 89,\n",
       " 113: 88,\n",
       " 112: 87,\n",
       " 111: 86,\n",
       " 110: 85,\n",
       " 109: 84,\n",
       " 108: 83,\n",
       " 107: 82,\n",
       " 106: 81,\n",
       " 105: 80,\n",
       " 104: 79,\n",
       " 103: 78,\n",
       " 102: 77,\n",
       " 101: 76,\n",
       " 100: 75,\n",
       " 99: 74,\n",
       " 98: 73,\n",
       " 97: 72,\n",
       " 96: 71,\n",
       " 95: 70,\n",
       " 94: 69,\n",
       " 93: 68,\n",
       " 92: 67,\n",
       " 91: 66,\n",
       " 90: 65,\n",
       " 89: 64,\n",
       " 88: 63,\n",
       " 87: 62,\n",
       " 86: 61,\n",
       " 85: 60,\n",
       " 84: 59,\n",
       " 83: 58,\n",
       " 82: 57,\n",
       " 81: 56,\n",
       " 80: 55,\n",
       " 79: 54,\n",
       " 78: 53,\n",
       " 77: 52,\n",
       " 76: 51,\n",
       " 75: 50,\n",
       " 74: 49,\n",
       " 73: 48,\n",
       " 72: 47,\n",
       " 71: 46,\n",
       " 70: 45,\n",
       " 69: 44,\n",
       " 68: 43,\n",
       " 67: 42,\n",
       " 66: 41,\n",
       " 65: 40,\n",
       " 64: 39,\n",
       " 63: 38,\n",
       " 62: 37,\n",
       " 61: 36,\n",
       " 60: 35,\n",
       " 59: 34,\n",
       " 58: 33,\n",
       " 57: 32,\n",
       " 56: 31,\n",
       " 55: 30,\n",
       " 54: 29,\n",
       " 50: 28,\n",
       " 49: 27,\n",
       " 48: 26,\n",
       " 47: 25,\n",
       " 46: 24,\n",
       " 45: 23,\n",
       " 44: 22,\n",
       " 43: 21,\n",
       " 42: 20,\n",
       " 41: 19,\n",
       " 40: 18,\n",
       " 39: 17,\n",
       " 38: 16,\n",
       " 37: 15,\n",
       " 36: 14,\n",
       " 35: 13,\n",
       " 34: 12,\n",
       " 33: 11,\n",
       " 32: 10,\n",
       " 31: 9,\n",
       " 30: 8,\n",
       " 29: 7,\n",
       " 28: 6,\n",
       " 27: 5,\n",
       " 26: 4,\n",
       " 25: 3,\n",
       " 24: 2,\n",
       " 23: 1}"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "q_to_r_poss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in query_pos:\n",
    "    if i not in q_to_r_poss.keys():\n",
    "        continue\n",
    "    print(rseq_complement[q_to_r_poss[i]:q_to_r_poss[i+2]],end='')\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in ref_poss:\n",
    "    print([i],end='')\n",
    "print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in ref_poss:\n",
    "    print(contigs[reference_name][i-1:i+1],end='')\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CGCGCGCGCGCG\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in query_pos:\n",
    "        print(seq[i:i+2],end='')\n",
    "print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "from deepsignal3.utils.process_utils import get_motif_seqs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "motif_seqs=get_motif_seqs('CG')\n",
    "methyloc=0\n",
    "tsite_locs = get_refloc_of_methysite_in_motif(seq, set(motif_seqs), methyloc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CG\n",
      "CG\n",
      "CG\n",
      "CG\n",
      "CG\n",
      "CG\n"
     ]
    }
   ],
   "source": [
    "for loc_in_read in tsite_locs:\n",
    "    print(seq[loc_in_read:loc_in_read+2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "919\n"
     ]
    }
   ],
   "source": [
    "print(bam_read.infer_query_length())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3S142M1I52M1D142M4I189M1I69M1I19M2D242M3I28M23S\n"
     ]
    }
   ],
   "source": [
    "print(bam_read.cigarstring)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_bed(bisulfite_bed):\n",
    "    key_sep = \"||\"\n",
    "    freqinfo = {}\n",
    "    pos=0\n",
    "    neg=0\n",
    "    with open(bisulfite_bed, \"r\") as rf:\n",
    "        for line in rf:\n",
    "            words = line.strip().split()\n",
    "            if int(words[9])<5:\n",
    "                continue\n",
    "            m_key = key_sep.join([words[0], words[1]])\n",
    "            freqinfo[m_key] = float(words[10])\n",
    "            if float(words[10])>=100.0:\n",
    "                pos+=1\n",
    "            elif float(words[10])<=0.0:\n",
    "                neg+=1\n",
    "    print(pos)\n",
    "    print(neg)\n",
    "    return freqinfo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9734158\n",
      "5630249\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'chr1||3369': 91.5,\n",
       " 'chr1||3375': 91.5,\n",
       " 'chr1||3377': 91.5,\n",
       " 'chr1||3388': 91.5,\n",
       " 'chr1||3408': 80.0,\n",
       " 'chr1||3416': 75.0,\n",
       " 'chr1||3422': 83.0,\n",
       " 'chr1||3426': 58.5,\n",
       " 'chr1||3443': 80.0,\n",
       " 'chr1||3459': 74.5,\n",
       " 'chr1||3467': 74.5,\n",
       " 'chr1||3469': 63.5,\n",
       " 'chr1||3485': 88.5,\n",
       " 'chr1||3498': 70.0,\n",
       " 'chr1||3542': 91.5,\n",
       " 'chr1||3543': 62.5,\n",
       " 'chr1||3555': 58.0,\n",
       " 'chr1||3556': 57.0,\n",
       " 'chr1||3558': 90.0,\n",
       " 'chr1||3559': 100.0,\n",
       " 'chr1||3583': 83.0,\n",
       " 'chr1||3584': 76.5,\n",
       " 'chr1||3596': 83.0,\n",
       " 'chr1||3597': 71.5,\n",
       " 'chr1||3601': 73.5,\n",
       " 'chr1||3602': 72.0,\n",
       " 'chr1||3604': 81.5,\n",
       " 'chr1||3605': 80.0,\n",
       " 'chr1||3624': 50.0,\n",
       " 'chr1||3625': 30.0,\n",
       " 'chr1||3662': 78.5,\n",
       " 'chr1||3663': 100.0,\n",
       " 'chr1||3666': 73.0,\n",
       " 'chr1||3667': 100.0,\n",
       " 'chr1||3692': 41.5,\n",
       " 'chr1||3693': 54.0,\n",
       " 'chr1||3724': 53.5,\n",
       " 'chr1||3725': 65.0,\n",
       " 'chr1||3728': 74.5,\n",
       " 'chr1||3729': 64.0,\n",
       " 'chr1||3779': 69.0,\n",
       " 'chr1||3780': 76.5,\n",
       " 'chr1||3786': 77.5,\n",
       " 'chr1||3787': 73.5,\n",
       " 'chr1||5255': 69.0,\n",
       " 'chr1||5256': 46.0,\n",
       " 'chr1||5551': 83.0,\n",
       " 'chr1||5552': 74.0,\n",
       " 'chr1||5636': 27.0,\n",
       " 'chr1||5637': 16.5,\n",
       " 'chr1||5713': 93.0,\n",
       " 'chr1||5714': 80.0,\n",
       " 'chr1||5761': 54.0,\n",
       " 'chr1||5970': 78.5,\n",
       " 'chr1||6028': 21.5,\n",
       " 'chr1||6342': 72.5,\n",
       " 'chr1||6400': 48.0,\n",
       " 'chr1||6452': 24.5,\n",
       " 'chr1||7447': 93.0,\n",
       " 'chr1||7448': 87.5,\n",
       " 'chr1||7483': 93.0,\n",
       " 'chr1||7484': 90.0,\n",
       " 'chr1||7509': 100.0,\n",
       " 'chr1||7510': 86.0,\n",
       " 'chr1||9635': 20.0,\n",
       " 'chr1||14600': 75.0,\n",
       " 'chr1||14857': 54.5,\n",
       " 'chr1||14882': 36.5,\n",
       " 'chr1||14990': 86.0,\n",
       " 'chr1||14991': 100.0,\n",
       " 'chr1||14996': 92.0,\n",
       " 'chr1||14997': 100.0,\n",
       " 'chr1||15027': 94.0,\n",
       " 'chr1||15028': 100.0,\n",
       " 'chr1||15047': 68.0,\n",
       " 'chr1||15048': 81.5,\n",
       " 'chr1||15051': 85.0,\n",
       " 'chr1||15052': 100.0,\n",
       " 'chr1||15068': 38.5,\n",
       " 'chr1||15069': 93.0,\n",
       " 'chr1||15222': 0.0,\n",
       " 'chr1||16017': 46.5,\n",
       " 'chr1||17714': 80.5,\n",
       " 'chr1||17744': 100.0,\n",
       " 'chr1||17745': 94.5,\n",
       " 'chr1||17760': 100.0,\n",
       " 'chr1||17761': 95.0,\n",
       " 'chr1||17770': 94.5,\n",
       " 'chr1||17771': 90.0,\n",
       " 'chr1||17795': 100.0,\n",
       " 'chr1||17796': 89.5,\n",
       " 'chr1||17815': 35.0,\n",
       " 'chr1||17816': 35.5,\n",
       " 'chr1||17977': 67.0,\n",
       " 'chr1||17978': 53.5,\n",
       " 'chr1||18664': 70.0,\n",
       " 'chr1||18713': 4.5,\n",
       " 'chr1||19560': 84.5,\n",
       " 'chr1||19574': 58.5,\n",
       " 'chr1||19614': 47.0,\n",
       " 'chr1||19800': 76.0,\n",
       " 'chr1||19806': 61.0,\n",
       " 'chr1||19908': 29.0,\n",
       " 'chr1||20026': 40.0,\n",
       " 'chr1||20351': 64.0,\n",
       " 'chr1||20369': 89.0,\n",
       " 'chr1||20384': 60.0,\n",
       " 'chr1||20412': 6.0,\n",
       " 'chr1||20547': 18.5,\n",
       " 'chr1||20597': 29.5,\n",
       " 'chr1||20606': 72.0,\n",
       " 'chr1||20624': 92.0,\n",
       " 'chr1||20627': 72.0,\n",
       " 'chr1||20641': 36.5,\n",
       " 'chr1||20684': 14.0,\n",
       " 'chr1||20706': 26.5,\n",
       " 'chr1||20772': 94.0,\n",
       " 'chr1||20790': 44.5,\n",
       " 'chr1||21506': 16.0,\n",
       " 'chr1||21533': 93.0,\n",
       " 'chr1||21545': 93.0,\n",
       " 'chr1||21562': 100.0,\n",
       " 'chr1||21589': 26.5,\n",
       " 'chr1||22110': 100.0,\n",
       " 'chr1||22117': 60.5,\n",
       " 'chr1||22151': 59.0,\n",
       " 'chr1||22185': 78.5,\n",
       " 'chr1||22197': 100.0,\n",
       " 'chr1||22205': 91.0,\n",
       " 'chr1||22215': 100.0,\n",
       " 'chr1||22240': 69.0,\n",
       " 'chr1||22245': 94.5,\n",
       " 'chr1||22271': 62.0,\n",
       " 'chr1||22306': 84.5,\n",
       " 'chr1||22324': 100.0,\n",
       " 'chr1||22327': 91.5,\n",
       " 'chr1||22534': 58.5,\n",
       " 'chr1||22574': 50.0,\n",
       " 'chr1||22599': 38.5,\n",
       " 'chr1||22703': 12.5,\n",
       " 'chr1||22806': 86.0,\n",
       " 'chr1||22807': 90.5,\n",
       " 'chr1||22812': 100.0,\n",
       " 'chr1||22813': 90.0,\n",
       " 'chr1||22818': 91.5,\n",
       " 'chr1||22819': 93.0,\n",
       " 'chr1||22916': 32.5,\n",
       " 'chr1||22917': 48.5,\n",
       " 'chr1||22955': 86.0,\n",
       " 'chr1||22956': 82.5,\n",
       " 'chr1||22967': 68.0,\n",
       " 'chr1||22968': 72.5,\n",
       " 'chr1||23065': 47.5,\n",
       " 'chr1||23066': 24.0,\n",
       " 'chr1||23165': 46.0,\n",
       " 'chr1||23166': 33.0,\n",
       " 'chr1||23353': 100.0,\n",
       " 'chr1||24075': 83.0,\n",
       " 'chr1||24123': 81.5,\n",
       " 'chr1||24136': 88.5,\n",
       " 'chr1||24169': 71.0,\n",
       " 'chr1||24183': 50.0,\n",
       " 'chr1||24194': 59.5,\n",
       " 'chr1||24216': 76.5,\n",
       " 'chr1||24230': 100.0,\n",
       " 'chr1||24241': 87.5,\n",
       " 'chr1||24263': 100.0,\n",
       " 'chr1||26236': 91.5,\n",
       " 'chr1||26250': 100.0,\n",
       " 'chr1||26259': 63.0,\n",
       " 'chr1||26261': 93.0,\n",
       " 'chr1||26268': 100.0,\n",
       " 'chr1||26283': 39.5,\n",
       " 'chr1||26314': 63.0,\n",
       " 'chr1||26368': 90.0,\n",
       " 'chr1||26948': 100.0,\n",
       " 'chr1||26957': 100.0,\n",
       " 'chr1||26958': 100.0,\n",
       " 'chr1||27074': 2.5,\n",
       " 'chr1||27075': 9.5,\n",
       " 'chr1||27114': 63.0,\n",
       " 'chr1||27115': 77.0,\n",
       " 'chr1||27136': 25.5,\n",
       " 'chr1||27137': 41.0,\n",
       " 'chr1||28478': 77.5,\n",
       " 'chr1||28484': 51.0,\n",
       " 'chr1||28544': 48.5,\n",
       " 'chr1||28556': 78.5,\n",
       " 'chr1||28653': 0.0,\n",
       " 'chr1||28712': 28.5,\n",
       " 'chr1||28720': 35.5,\n",
       " 'chr1||28779': 67.5,\n",
       " 'chr1||28883': 78.5,\n",
       " 'chr1||28885': 80.0,\n",
       " 'chr1||28957': 62.5,\n",
       " 'chr1||28958': 71.5,\n",
       " 'chr1||28961': 67.0,\n",
       " 'chr1||28989': 51.5,\n",
       " 'chr1||28990': 41.5,\n",
       " 'chr1||29111': 38.0,\n",
       " 'chr1||29112': 40.5,\n",
       " 'chr1||30109': 39.0,\n",
       " 'chr1||32026': 37.5,\n",
       " 'chr1||32048': 66.5,\n",
       " 'chr1||32127': 25.0,\n",
       " 'chr1||35161': 50.0,\n",
       " 'chr1||35162': 35.5,\n",
       " 'chr1||35273': 82.5,\n",
       " 'chr1||35274': 70.0,\n",
       " 'chr1||35323': 94.0,\n",
       " 'chr1||35324': 94.5,\n",
       " 'chr1||35339': 59.0,\n",
       " 'chr1||35340': 100.0,\n",
       " 'chr1||35392': 56.0,\n",
       " 'chr1||35415': 54.0,\n",
       " 'chr1||35502': 91.5,\n",
       " 'chr1||35639': 28.5,\n",
       " 'chr1||35688': 100.0,\n",
       " 'chr1||35699': 100.0,\n",
       " 'chr1||35704': 100.0,\n",
       " 'chr1||35748': 100.0,\n",
       " 'chr1||35843': 76.0,\n",
       " 'chr1||35861': 100.0,\n",
       " 'chr1||35875': 100.0,\n",
       " 'chr1||35937': 48.5,\n",
       " 'chr1||35942': 80.0,\n",
       " 'chr1||36015': 100.0,\n",
       " 'chr1||37935': 65.5,\n",
       " 'chr1||37957': 55.0,\n",
       " 'chr1||37977': 46.5,\n",
       " 'chr1||41753': 80.0,\n",
       " 'chr1||41793': 0.0,\n",
       " 'chr1||41875': 30.5,\n",
       " 'chr1||41876': 42.5,\n",
       " 'chr1||41893': 4.5,\n",
       " 'chr1||41894': 14.0,\n",
       " 'chr1||41912': 33.5,\n",
       " 'chr1||41913': 33.0,\n",
       " 'chr1||41961': 90.0,\n",
       " 'chr1||41962': 78.0,\n",
       " 'chr1||42025': 58.5,\n",
       " 'chr1||42072': 20.0,\n",
       " 'chr1||43403': 31.5,\n",
       " 'chr1||44814': 29.0,\n",
       " 'chr1||44815': 18.5,\n",
       " 'chr1||44866': 30.0,\n",
       " 'chr1||44867': 22.0,\n",
       " 'chr1||44923': 80.5,\n",
       " 'chr1||44924': 56.0,\n",
       " 'chr1||44956': 8.5,\n",
       " 'chr1||44957': 5.0,\n",
       " 'chr1||44987': 66.5,\n",
       " 'chr1||44988': 78.5,\n",
       " 'chr1||52150': 38.5,\n",
       " 'chr1||52151': 38.0,\n",
       " 'chr1||56090': 45.0,\n",
       " 'chr1||58571': 0.0,\n",
       " 'chr1||60742': 38.0,\n",
       " 'chr1||60798': 4.0,\n",
       " 'chr1||60799': 14.0,\n",
       " 'chr1||60887': 6.0,\n",
       " 'chr1||60931': 78.0,\n",
       " 'chr1||60941': 88.0,\n",
       " 'chr1||65954': 100.0,\n",
       " 'chr1||65955': 91.0,\n",
       " 'chr1||65973': 100.0,\n",
       " 'chr1||65974': 100.0,\n",
       " 'chr1||66940': 13.5,\n",
       " 'chr1||67393': 34.5,\n",
       " 'chr1||67716': 30.0,\n",
       " 'chr1||70140': 50.0,\n",
       " 'chr1||70186': 73.0,\n",
       " 'chr1||70200': 58.5,\n",
       " 'chr1||71819': 24.5,\n",
       " 'chr1||73029': 72.0,\n",
       " 'chr1||73060': 94.0,\n",
       " 'chr1||73062': 86.5,\n",
       " 'chr1||76403': 36.0,\n",
       " 'chr1||76655': 60.0,\n",
       " 'chr1||86471': 81.5,\n",
       " 'chr1||86479': 55.5,\n",
       " 'chr1||117074': 22.0,\n",
       " 'chr1||118326': 0.0,\n",
       " 'chr1||118368': 60.0,\n",
       " 'chr1||118744': 50.0,\n",
       " 'chr1||118993': 34.0,\n",
       " 'chr1||122466': 63.0,\n",
       " 'chr1||122467': 79.5,\n",
       " 'chr1||122559': 54.0,\n",
       " 'chr1||122560': 70.5,\n",
       " 'chr1||123005': 16.0,\n",
       " 'chr1||125306': 15.5,\n",
       " 'chr1||125307': 6.0,\n",
       " 'chr1||125457': 0.0,\n",
       " 'chr1||125458': 0.0,\n",
       " 'chr1||125516': 53.5,\n",
       " 'chr1||125517': 55.5,\n",
       " 'chr1||125532': 55.5,\n",
       " 'chr1||125533': 36.5,\n",
       " 'chr1||126308': 69.5,\n",
       " 'chr1||126468': 0.0,\n",
       " 'chr1||126469': 0.0,\n",
       " 'chr1||126524': 46.0,\n",
       " 'chr1||127513': 29.5,\n",
       " 'chr1||127514': 28.0,\n",
       " 'chr1||127550': 17.0,\n",
       " 'chr1||127551': 35.0,\n",
       " 'chr1||127587': 33.5,\n",
       " 'chr1||127588': 21.5,\n",
       " 'chr1||127814': 87.5,\n",
       " 'chr1||127815': 90.0,\n",
       " 'chr1||128240': 100.0,\n",
       " 'chr1||128291': 65.0,\n",
       " 'chr1||128331': 58.0,\n",
       " 'chr1||128363': 26.0,\n",
       " 'chr1||128481': 75.5,\n",
       " 'chr1||128523': 32.0,\n",
       " 'chr1||128565': 47.0,\n",
       " 'chr1||128594': 77.5,\n",
       " 'chr1||131804': 35.0,\n",
       " 'chr1||134599': 83.5,\n",
       " 'chr1||134600': 100.0,\n",
       " 'chr1||136415': 86.5,\n",
       " 'chr1||136625': 76.5,\n",
       " 'chr1||136811': 94.0,\n",
       " 'chr1||136812': 94.5,\n",
       " 'chr1||136815': 90.0,\n",
       " 'chr1||136816': 100.0,\n",
       " 'chr1||136819': 100.0,\n",
       " 'chr1||136820': 100.0,\n",
       " 'chr1||136821': 100.0,\n",
       " 'chr1||136822': 100.0,\n",
       " 'chr1||136831': 100.0,\n",
       " 'chr1||136832': 100.0,\n",
       " 'chr1||136859': 94.0,\n",
       " 'chr1||136864': 53.0,\n",
       " 'chr1||136868': 75.0,\n",
       " 'chr1||136875': 67.5,\n",
       " 'chr1||136890': 90.0,\n",
       " 'chr1||136900': 100.0,\n",
       " 'chr1||136901': 90.0,\n",
       " 'chr1||136909': 100.0,\n",
       " 'chr1||136910': 100.0,\n",
       " 'chr1||136949': 83.5,\n",
       " 'chr1||136953': 74.5,\n",
       " 'chr1||136954': 84.5,\n",
       " 'chr1||136961': 100.0,\n",
       " 'chr1||136962': 100.0,\n",
       " 'chr1||136965': 65.5,\n",
       " 'chr1||136966': 80.0,\n",
       " 'chr1||137791': 72.5,\n",
       " 'chr1||138048': 62.0,\n",
       " 'chr1||138098': 76.5,\n",
       " 'chr1||138262': 70.0,\n",
       " 'chr1||138675': 77.5,\n",
       " 'chr1||138676': 84.5,\n",
       " 'chr1||138765': 59.5,\n",
       " 'chr1||138766': 69.0,\n",
       " 'chr1||138810': 76.5,\n",
       " 'chr1||138811': 78.0,\n",
       " 'chr1||138826': 82.0,\n",
       " 'chr1||138827': 80.5,\n",
       " 'chr1||138845': 89.0,\n",
       " 'chr1||138846': 94.0,\n",
       " 'chr1||138850': 88.5,\n",
       " 'chr1||138851': 100.0,\n",
       " 'chr1||138864': 100.0,\n",
       " 'chr1||138865': 100.0,\n",
       " 'chr1||138873': 100.0,\n",
       " 'chr1||138874': 100.0,\n",
       " 'chr1||138880': 88.0,\n",
       " 'chr1||138881': 90.5,\n",
       " 'chr1||138932': 80.0,\n",
       " 'chr1||138933': 59.5,\n",
       " 'chr1||138936': 76.5,\n",
       " 'chr1||138937': 69.5,\n",
       " 'chr1||138977': 33.5,\n",
       " 'chr1||138978': 38.5,\n",
       " 'chr1||138988': 60.0,\n",
       " 'chr1||138989': 71.0,\n",
       " 'chr1||139008': 65.5,\n",
       " 'chr1||139009': 73.5,\n",
       " 'chr1||139022': 69.5,\n",
       " 'chr1||139023': 62.0,\n",
       " 'chr1||139033': 89.0,\n",
       " 'chr1||139034': 91.5,\n",
       " 'chr1||139038': 88.0,\n",
       " 'chr1||139039': 86.5,\n",
       " 'chr1||139078': 82.5,\n",
       " 'chr1||139079': 68.0,\n",
       " 'chr1||139082': 100.0,\n",
       " 'chr1||139083': 84.0,\n",
       " 'chr1||140257': 21.0,\n",
       " 'chr1||141190': 44.0,\n",
       " 'chr1||141948': 45.0,\n",
       " 'chr1||142753': 100.0,\n",
       " 'chr1||142758': 100.0,\n",
       " 'chr1||143813': 74.5,\n",
       " 'chr1||143814': 100.0,\n",
       " 'chr1||143941': 91.5,\n",
       " 'chr1||144983': 74.0,\n",
       " 'chr1||145242': 47.5,\n",
       " 'chr1||145346': 63.5,\n",
       " 'chr1||145400': 81.5,\n",
       " 'chr1||145432': 65.0,\n",
       " 'chr1||145495': 38.5,\n",
       " 'chr1||145539': 77.5,\n",
       " 'chr1||148558': 58.5,\n",
       " 'chr1||148594': 81.0,\n",
       " 'chr1||148607': 100.0,\n",
       " 'chr1||148622': 100.0,\n",
       " 'chr1||148623': 100.0,\n",
       " 'chr1||148639': 100.0,\n",
       " 'chr1||148640': 100.0,\n",
       " 'chr1||148644': 100.0,\n",
       " 'chr1||148645': 100.0,\n",
       " 'chr1||148656': 90.0,\n",
       " 'chr1||148657': 58.5,\n",
       " 'chr1||148685': 100.0,\n",
       " 'chr1||148686': 94.0,\n",
       " 'chr1||148689': 85.0,\n",
       " 'chr1||148690': 89.5,\n",
       " 'chr1||148739': 75.5,\n",
       " 'chr1||148740': 68.5,\n",
       " 'chr1||148808': 100.0,\n",
       " 'chr1||148821': 100.0,\n",
       " 'chr1||148857': 100.0,\n",
       " 'chr1||149102': 100.0,\n",
       " 'chr1||149138': 94.5,\n",
       " 'chr1||149171': 94.0,\n",
       " 'chr1||149188': 100.0,\n",
       " 'chr1||149208': 100.0,\n",
       " 'chr1||149224': 100.0,\n",
       " 'chr1||149258': 100.0,\n",
       " 'chr1||149263': 80.0,\n",
       " 'chr1||149281': 100.0,\n",
       " 'chr1||149285': 100.0,\n",
       " 'chr1||149367': 90.0,\n",
       " 'chr1||149440': 90.0,\n",
       " 'chr1||149525': 89.0,\n",
       " 'chr1||149558': 100.0,\n",
       " 'chr1||149613': 100.0,\n",
       " 'chr1||149631': 76.5,\n",
       " 'chr1||149647': 100.0,\n",
       " 'chr1||149659': 100.0,\n",
       " 'chr1||149768': 100.0,\n",
       " 'chr1||149786': 94.0,\n",
       " 'chr1||149792': 66.5,\n",
       " 'chr1||149823': 87.5,\n",
       " 'chr1||149841': 100.0,\n",
       " 'chr1||153660': 80.0,\n",
       " 'chr1||153666': 100.0,\n",
       " 'chr1||153711': 100.0,\n",
       " 'chr1||153725': 91.5,\n",
       " 'chr1||153727': 100.0,\n",
       " 'chr1||153739': 100.0,\n",
       " 'chr1||153787': 100.0,\n",
       " 'chr1||153808': 100.0,\n",
       " 'chr1||153822': 100.0,\n",
       " 'chr1||153824': 100.0,\n",
       " 'chr1||153836': 100.0,\n",
       " 'chr1||153872': 100.0,\n",
       " 'chr1||154019': 100.0,\n",
       " 'chr1||154022': 100.0,\n",
       " 'chr1||154163': 31.5,\n",
       " 'chr1||154178': 80.0,\n",
       " 'chr1||154239': 100.0,\n",
       " 'chr1||154273': 100.0,\n",
       " 'chr1||154276': 100.0,\n",
       " 'chr1||154280': 100.0,\n",
       " 'chr1||154283': 100.0,\n",
       " 'chr1||154301': 100.0,\n",
       " 'chr1||154316': 100.0,\n",
       " 'chr1||154320': 100.0,\n",
       " 'chr1||154350': 94.5,\n",
       " 'chr1||154955': 100.0,\n",
       " 'chr1||154971': 91.5,\n",
       " 'chr1||155047': 100.0,\n",
       " 'chr1||155112': 63.0,\n",
       " 'chr1||156427': 90.5,\n",
       " 'chr1||156470': 100.0,\n",
       " 'chr1||156522': 100.0,\n",
       " 'chr1||156530': 100.0,\n",
       " 'chr1||156564': 100.0,\n",
       " 'chr1||156655': 100.0,\n",
       " 'chr1||156656': 100.0,\n",
       " 'chr1||156679': 90.0,\n",
       " 'chr1||156680': 100.0,\n",
       " 'chr1||156685': 90.0,\n",
       " 'chr1||156686': 100.0,\n",
       " 'chr1||156714': 100.0,\n",
       " 'chr1||156763': 84.5,\n",
       " 'chr1||156844': 86.0,\n",
       " 'chr1||157252': 92.5,\n",
       " 'chr1||157253': 100.0,\n",
       " 'chr1||157261': 79.0,\n",
       " 'chr1||157262': 87.0,\n",
       " 'chr1||157414': 100.0,\n",
       " 'chr1||158277': 81.5,\n",
       " 'chr1||159339': 65.0,\n",
       " 'chr1||159340': 75.5,\n",
       " 'chr1||159416': 100.0,\n",
       " 'chr1||159417': 100.0,\n",
       " 'chr1||159452': 94.0,\n",
       " 'chr1||159453': 100.0,\n",
       " 'chr1||159556': 100.0,\n",
       " 'chr1||159557': 91.0,\n",
       " 'chr1||159605': 100.0,\n",
       " 'chr1||159606': 100.0,\n",
       " 'chr1||159615': 100.0,\n",
       " 'chr1||159616': 100.0,\n",
       " 'chr1||159620': 100.0,\n",
       " 'chr1||159621': 94.5,\n",
       " 'chr1||159710': 86.0,\n",
       " 'chr1||159711': 90.0,\n",
       " 'chr1||159836': 86.0,\n",
       " 'chr1||159837': 84.0,\n",
       " 'chr1||159879': 100.0,\n",
       " 'chr1||159880': 100.0,\n",
       " 'chr1||159931': 80.0,\n",
       " 'chr1||159932': 100.0,\n",
       " 'chr1||159955': 85.5,\n",
       " 'chr1||159956': 100.0,\n",
       " 'chr1||159972': 94.0,\n",
       " 'chr1||159973': 94.5,\n",
       " 'chr1||159993': 71.5,\n",
       " 'chr1||159994': 81.0,\n",
       " 'chr1||160026': 100.0,\n",
       " 'chr1||160064': 87.5,\n",
       " 'chr1||160088': 78.5,\n",
       " 'chr1||160122': 100.0,\n",
       " 'chr1||160123': 100.0,\n",
       " 'chr1||160171': 49.5,\n",
       " 'chr1||160172': 42.5,\n",
       " 'chr1||160252': 100.0,\n",
       " 'chr1||160253': 100.0,\n",
       " 'chr1||160354': 100.0,\n",
       " 'chr1||160355': 100.0,\n",
       " 'chr1||160660': 100.0,\n",
       " 'chr1||160661': 95.0,\n",
       " 'chr1||160669': 83.5,\n",
       " 'chr1||160670': 94.5,\n",
       " 'chr1||160752': 100.0,\n",
       " 'chr1||160753': 100.0,\n",
       " 'chr1||160759': 93.5,\n",
       " 'chr1||160760': 93.0,\n",
       " 'chr1||161669': 83.5,\n",
       " 'chr1||161670': 100.0,\n",
       " 'chr1||161721': 100.0,\n",
       " 'chr1||161722': 100.0,\n",
       " 'chr1||161860': 100.0,\n",
       " 'chr1||161861': 100.0,\n",
       " 'chr1||161863': 94.0,\n",
       " 'chr1||161864': 100.0,\n",
       " 'chr1||161931': 66.5,\n",
       " 'chr1||161932': 71.5,\n",
       " 'chr1||161936': 76.5,\n",
       " 'chr1||161937': 73.0,\n",
       " 'chr1||161959': 72.0,\n",
       " 'chr1||161960': 72.0,\n",
       " 'chr1||161965': 72.0,\n",
       " 'chr1||161966': 71.0,\n",
       " 'chr1||162019': 77.0,\n",
       " 'chr1||162020': 72.5,\n",
       " 'chr1||162043': 100.0,\n",
       " 'chr1||162044': 100.0,\n",
       " 'chr1||162079': 100.0,\n",
       " 'chr1||162223': 93.5,\n",
       " 'chr1||162238': 100.0,\n",
       " 'chr1||162360': 100.0,\n",
       " 'chr1||162745': 36.5,\n",
       " 'chr1||162822': 84.0,\n",
       " 'chr1||162823': 80.0,\n",
       " 'chr1||162962': 50.5,\n",
       " 'chr1||162963': 74.0,\n",
       " 'chr1||163011': 100.0,\n",
       " 'chr1||163012': 94.0,\n",
       " 'chr1||163021': 92.5,\n",
       " 'chr1||163022': 100.0,\n",
       " 'chr1||163026': 88.0,\n",
       " 'chr1||163027': 100.0,\n",
       " 'chr1||163099': 100.0,\n",
       " 'chr1||163100': 100.0,\n",
       " 'chr1||163116': 82.5,\n",
       " 'chr1||163117': 100.0,\n",
       " 'chr1||163242': 65.0,\n",
       " 'chr1||163243': 90.0,\n",
       " 'chr1||163285': 81.0,\n",
       " 'chr1||163286': 100.0,\n",
       " 'chr1||163337': 89.5,\n",
       " 'chr1||163338': 85.0,\n",
       " 'chr1||163361': 100.0,\n",
       " 'chr1||163362': 94.0,\n",
       " 'chr1||163378': 94.0,\n",
       " 'chr1||163379': 100.0,\n",
       " 'chr1||163399': 92.5,\n",
       " 'chr1||163400': 93.0,\n",
       " 'chr1||163432': 61.5,\n",
       " 'chr1||163470': 77.5,\n",
       " 'chr1||163494': 100.0,\n",
       " 'chr1||163500': 100.0,\n",
       " 'chr1||164067': 91.0,\n",
       " 'chr1||164068': 85.0,\n",
       " 'chr1||164076': 86.5,\n",
       " 'chr1||164077': 70.0,\n",
       " 'chr1||164159': 92.0,\n",
       " 'chr1||164160': 93.0,\n",
       " 'chr1||165248': 100.0,\n",
       " 'chr1||165272': 100.0,\n",
       " 'chr1||165275': 100.0,\n",
       " 'chr1||165343': 52.0,\n",
       " 'chr1||165371': 81.0,\n",
       " 'chr1||165377': 67.5,\n",
       " 'chr1||165431': 68.5,\n",
       " 'chr1||165454': 90.0,\n",
       " 'chr1||165455': 100.0,\n",
       " 'chr1||165490': 100.0,\n",
       " 'chr1||165491': 93.0,\n",
       " 'chr1||165639': 100.0,\n",
       " 'chr1||165640': 94.0,\n",
       " 'chr1||165655': 91.5,\n",
       " 'chr1||166659': 93.0,\n",
       " 'chr1||166702': 89.0,\n",
       " 'chr1||166754': 94.0,\n",
       " 'chr1||166778': 87.5,\n",
       " 'chr1||166795': 95.0,\n",
       " 'chr1||166816': 80.0,\n",
       " 'chr1||167284': 100.0,\n",
       " 'chr1||167285': 89.0,\n",
       " 'chr1||167317': 100.0,\n",
       " 'chr1||167318': 100.0,\n",
       " 'chr1||167320': 100.0,\n",
       " 'chr1||167321': 100.0,\n",
       " 'chr1||167441': 100.0,\n",
       " 'chr1||170555': 100.0,\n",
       " 'chr1||170640': 93.0,\n",
       " 'chr1||170648': 100.0,\n",
       " 'chr1||170690': 100.0,\n",
       " 'chr1||171523': 87.5,\n",
       " 'chr1||171665': 100.0,\n",
       " 'chr1||178984': 90.0,\n",
       " 'chr1||178985': 100.0,\n",
       " 'chr1||179472': 87.0,\n",
       " 'chr1||179951': 100.0,\n",
       " 'chr1||181125': 84.0,\n",
       " 'chr1||181148': 94.0,\n",
       " 'chr1||181149': 100.0,\n",
       " 'chr1||181169': 93.0,\n",
       " 'chr1||181170': 100.0,\n",
       " 'chr1||181286': 69.0,\n",
       " 'chr1||181287': 74.0,\n",
       " 'chr1||181415': 94.0,\n",
       " 'chr1||181416': 93.0,\n",
       " 'chr1||181426': 100.0,\n",
       " 'chr1||181427': 100.0,\n",
       " 'chr1||181477': 90.0,\n",
       " 'chr1||181482': 90.0,\n",
       " 'chr1||181514': 81.5,\n",
       " 'chr1||181622': 84.0,\n",
       " 'chr1||181723': 87.5,\n",
       " 'chr1||181774': 100.0,\n",
       " 'chr1||181791': 100.0,\n",
       " 'chr1||181801': 100.0,\n",
       " 'chr1||182752': 100.0,\n",
       " 'chr1||182753': 100.0,\n",
       " 'chr1||183430': 86.5,\n",
       " 'chr1||183470': 86.0,\n",
       " 'chr1||184214': 83.5,\n",
       " 'chr1||184277': 100.0,\n",
       " 'chr1||184278': 100.0,\n",
       " 'chr1||184285': 100.0,\n",
       " 'chr1||184286': 93.0,\n",
       " 'chr1||184393': 90.0,\n",
       " 'chr1||184619': 94.0,\n",
       " 'chr1||184620': 100.0,\n",
       " 'chr1||184739': 86.5,\n",
       " 'chr1||184740': 100.0,\n",
       " 'chr1||184814': 100.0,\n",
       " 'chr1||184815': 100.0,\n",
       " 'chr1||187010': 85.5,\n",
       " 'chr1||187078': 100.0,\n",
       " 'chr1||188760': 100.0,\n",
       " 'chr1||190184': 85.5,\n",
       " 'chr1||190185': 81.0,\n",
       " 'chr1||190276': 100.0,\n",
       " 'chr1||190277': 91.5,\n",
       " 'chr1||191102': 100.0,\n",
       " 'chr1||191121': 100.0,\n",
       " 'chr1||191122': 100.0,\n",
       " 'chr1||191231': 100.0,\n",
       " 'chr1||191232': 100.0,\n",
       " 'chr1||191405': 90.0,\n",
       " 'chr1||191758': 100.0,\n",
       " 'chr1||191780': 94.0,\n",
       " 'chr1||191969': 100.0,\n",
       " 'chr1||191970': 100.0,\n",
       " 'chr1||192048': 100.0,\n",
       " 'chr1||192049': 100.0,\n",
       " 'chr1||192138': 90.0,\n",
       " 'chr1||192139': 84.5,\n",
       " 'chr1||192250': 100.0,\n",
       " 'chr1||192251': 100.0,\n",
       " 'chr1||192392': 89.0,\n",
       " 'chr1||193015': 100.0,\n",
       " 'chr1||193016': 100.0,\n",
       " 'chr1||193083': 100.0,\n",
       " 'chr1||193145': 100.0,\n",
       " 'chr1||193203': 93.0,\n",
       " 'chr1||193204': 93.0,\n",
       " 'chr1||193271': 94.0,\n",
       " 'chr1||193272': 100.0,\n",
       " 'chr1||193410': 80.0,\n",
       " 'chr1||193474': 100.0,\n",
       " 'chr1||193970': 100.0,\n",
       " 'chr1||195209': 72.0,\n",
       " 'chr1||195399': 100.0,\n",
       " 'chr1||195400': 100.0,\n",
       " 'chr1||195471': 100.0,\n",
       " 'chr1||195472': 100.0,\n",
       " 'chr1||195523': 100.0,\n",
       " 'chr1||195623': 91.5,\n",
       " 'chr1||195627': 83.5,\n",
       " 'chr1||195792': 100.0,\n",
       " 'chr1||196405': 63.5,\n",
       " 'chr1||196437': 75.0,\n",
       " 'chr1||196496': 94.0,\n",
       " 'chr1||196572': 100.0,\n",
       " 'chr1||196627': 100.0,\n",
       " 'chr1||196628': 94.5,\n",
       " 'chr1||196630': 100.0,\n",
       " 'chr1||196631': 94.5,\n",
       " 'chr1||196640': 100.0,\n",
       " 'chr1||196641': 94.0,\n",
       " 'chr1||196907': 91.0,\n",
       " 'chr1||196908': 100.0,\n",
       " 'chr1||196917': 91.0,\n",
       " 'chr1||196918': 85.5,\n",
       " 'chr1||197026': 100.0,\n",
       " 'chr1||197027': 100.0,\n",
       " 'chr1||197037': 100.0,\n",
       " 'chr1||197038': 91.5,\n",
       " 'chr1||197063': 95.0,\n",
       " 'chr1||197064': 100.0,\n",
       " 'chr1||197821': 87.5,\n",
       " 'chr1||197924': 76.0,\n",
       " 'chr1||197933': 0.0,\n",
       " 'chr1||197955': 87.5,\n",
       " 'chr1||197980': 95.0,\n",
       " 'chr1||198093': 91.0,\n",
       " 'chr1||198138': 93.0,\n",
       " 'chr1||198139': 93.0,\n",
       " 'chr1||198148': 93.0,\n",
       " 'chr1||198149': 89.5,\n",
       " 'chr1||198193': 100.0,\n",
       " 'chr1||198194': 100.0,\n",
       " 'chr1||198251': 100.0,\n",
       " 'chr1||198252': 100.0,\n",
       " 'chr1||198273': 100.0,\n",
       " 'chr1||198274': 100.0,\n",
       " 'chr1||198367': 86.0,\n",
       " 'chr1||198368': 85.5,\n",
       " 'chr1||198448': 71.5,\n",
       " 'chr1||198487': 100.0,\n",
       " 'chr1||198504': 100.0,\n",
       " 'chr1||198674': 78.0,\n",
       " 'chr1||198814': 54.0,\n",
       " 'chr1||198975': 44.5,\n",
       " 'chr1||198976': 100.0,\n",
       " 'chr1||199042': 82.0,\n",
       " 'chr1||199074': 100.0,\n",
       " 'chr1||199082': 100.0,\n",
       " 'chr1||199282': 100.0,\n",
       " 'chr1||199826': 83.5,\n",
       " 'chr1||199829': 78.5,\n",
       " 'chr1||199830': 93.0,\n",
       " 'chr1||199905': 87.5,\n",
       " 'chr1||199906': 100.0,\n",
       " 'chr1||199974': 94.0,\n",
       " 'chr1||199975': 100.0,\n",
       " 'chr1||200053': 68.5,\n",
       " 'chr1||200054': 100.0,\n",
       " 'chr1||200077': 72.5,\n",
       " 'chr1||200078': 100.0,\n",
       " 'chr1||200112': 87.5,\n",
       " 'chr1||200113': 92.0,\n",
       " 'chr1||200353': 100.0,\n",
       " 'chr1||201171': 100.0,\n",
       " 'chr1||201199': 100.0,\n",
       " 'chr1||201200': 91.5,\n",
       " 'chr1||201253': 100.0,\n",
       " 'chr1||201254': 100.0,\n",
       " 'chr1||201255': 100.0,\n",
       " 'chr1||201256': 100.0,\n",
       " 'chr1||201346': 86.0,\n",
       " 'chr1||201347': 94.0,\n",
       " 'chr1||201374': 100.0,\n",
       " 'chr1||201375': 92.5,\n",
       " 'chr1||201510': 67.5,\n",
       " 'chr1||201511': 74.5,\n",
       " 'chr1||201573': 55.5,\n",
       " 'chr1||201574': 78.0,\n",
       " 'chr1||201636': 52.5,\n",
       " 'chr1||201692': 46.5,\n",
       " 'chr1||201731': 51.5,\n",
       " 'chr1||201739': 39.5,\n",
       " 'chr1||201776': 51.0,\n",
       " 'chr1||201786': 48.0,\n",
       " 'chr1||202001': 52.0,\n",
       " 'chr1||202115': 89.5,\n",
       " 'chr1||202143': 60.0,\n",
       " 'chr1||202175': 100.0,\n",
       " 'chr1||202248': 100.0,\n",
       " 'chr1||202250': 100.0,\n",
       " 'chr1||202252': 100.0,\n",
       " 'chr1||202272': 93.0,\n",
       " 'chr1||202294': 100.0,\n",
       " 'chr1||202304': 100.0,\n",
       " 'chr1||202328': 100.0,\n",
       " 'chr1||202593': 100.0,\n",
       " 'chr1||202687': 100.0,\n",
       " 'chr1||202749': 77.0,\n",
       " 'chr1||202859': 100.0,\n",
       " 'chr1||202860': 100.0,\n",
       " 'chr1||202861': 100.0,\n",
       " 'chr1||202862': 100.0,\n",
       " 'chr1||202904': 100.0,\n",
       " 'chr1||202905': 85.5,\n",
       " 'chr1||202920': 100.0,\n",
       " 'chr1||202921': 87.0,\n",
       " 'chr1||203087': 100.0,\n",
       " 'chr1||203383': 94.0,\n",
       " 'chr1||203384': 94.0,\n",
       " 'chr1||203588': 100.0,\n",
       " 'chr1||203602': 100.0,\n",
       " 'chr1||203603': 100.0,\n",
       " 'chr1||203701': 94.0,\n",
       " 'chr1||203702': 100.0,\n",
       " 'chr1||203741': 100.0,\n",
       " 'chr1||203742': 100.0,\n",
       " 'chr1||203895': 93.0,\n",
       " 'chr1||203896': 100.0,\n",
       " 'chr1||204143': 100.0,\n",
       " 'chr1||204144': 100.0,\n",
       " 'chr1||204151': 100.0,\n",
       " 'chr1||204152': 100.0,\n",
       " 'chr1||204205': 100.0,\n",
       " 'chr1||204206': 100.0,\n",
       " 'chr1||204261': 100.0,\n",
       " 'chr1||204262': 100.0,\n",
       " 'chr1||204269': 90.5,\n",
       " 'chr1||204270': 100.0,\n",
       " 'chr1||204356': 100.0,\n",
       " 'chr1||204357': 100.0,\n",
       " 'chr1||204361': 100.0,\n",
       " 'chr1||204362': 100.0,\n",
       " 'chr1||204408': 67.5,\n",
       " 'chr1||204409': 82.0,\n",
       " 'chr1||204449': 100.0,\n",
       " 'chr1||204450': 100.0,\n",
       " 'chr1||204461': 100.0,\n",
       " 'chr1||204462': 100.0,\n",
       " 'chr1||204473': 91.5,\n",
       " 'chr1||204474': 94.5,\n",
       " 'chr1||204521': 93.5,\n",
       " 'chr1||204522': 100.0,\n",
       " 'chr1||204532': 94.0,\n",
       " 'chr1||204533': 100.0,\n",
       " 'chr1||204544': 61.5,\n",
       " 'chr1||204545': 83.5,\n",
       " 'chr1||204556': 23.5,\n",
       " 'chr1||204557': 44.0,\n",
       " 'chr1||204603': 90.0,\n",
       " 'chr1||204604': 100.0,\n",
       " 'chr1||204605': 90.0,\n",
       " 'chr1||204606': 90.5,\n",
       " 'chr1||204609': 68.5,\n",
       " 'chr1||204610': 93.5,\n",
       " 'chr1||204637': 76.0,\n",
       " 'chr1||204638': 81.5,\n",
       " 'chr1||204645': 100.0,\n",
       " 'chr1||204646': 100.0,\n",
       " 'chr1||204668': 93.5,\n",
       " 'chr1||204669': 95.0,\n",
       " 'chr1||204699': 9.5,\n",
       " 'chr1||204700': 9.0,\n",
       " 'chr1||204704': 3.5,\n",
       " 'chr1||204705': 9.5,\n",
       " 'chr1||204828': 20.5,\n",
       " 'chr1||204829': 22.0,\n",
       " 'chr1||204929': 0.0,\n",
       " 'chr1||204930': 0.0,\n",
       " 'chr1||204969': 0.0,\n",
       " 'chr1||204970': 0.0,\n",
       " 'chr1||204974': 0.0,\n",
       " 'chr1||204975': 0.0,\n",
       " 'chr1||204978': 0.0,\n",
       " 'chr1||204979': 0.0,\n",
       " 'chr1||204980': 0.0,\n",
       " 'chr1||204981': 0.0,\n",
       " 'chr1||205057': 0.0,\n",
       " 'chr1||205058': 0.0,\n",
       " 'chr1||205077': 0.0,\n",
       " 'chr1||205078': 0.0,\n",
       " 'chr1||205141': 0.0,\n",
       " 'chr1||205142': 0.0,\n",
       " 'chr1||205158': 0.0,\n",
       " 'chr1||205159': 0.0,\n",
       " 'chr1||205166': 0.0,\n",
       " 'chr1||205167': 0.0,\n",
       " 'chr1||205168': 0.0,\n",
       " 'chr1||205169': 0.0,\n",
       " 'chr1||205177': 0.0,\n",
       " 'chr1||205178': 0.0,\n",
       " 'chr1||205180': 0.0,\n",
       " 'chr1||205181': 0.0,\n",
       " 'chr1||205186': 0.0,\n",
       " 'chr1||205187': 0.0,\n",
       " 'chr1||205205': 0.0,\n",
       " 'chr1||205206': 0.0,\n",
       " 'chr1||205208': 0.0,\n",
       " 'chr1||205209': 0.0,\n",
       " 'chr1||205216': 0.0,\n",
       " 'chr1||205217': 0.0,\n",
       " 'chr1||205227': 0.0,\n",
       " 'chr1||205228': 0.0,\n",
       " 'chr1||205232': 0.0,\n",
       " 'chr1||205233': 0.0,\n",
       " 'chr1||205239': 0.0,\n",
       " 'chr1||205240': 0.0,\n",
       " 'chr1||205252': 0.0,\n",
       " 'chr1||205253': 4.0,\n",
       " 'chr1||205274': 0.0,\n",
       " 'chr1||205275': 0.0,\n",
       " 'chr1||205293': 0.0,\n",
       " 'chr1||205294': 0.0,\n",
       " 'chr1||205295': 0.0,\n",
       " 'chr1||205296': 0.0,\n",
       " 'chr1||205323': 0.0,\n",
       " 'chr1||205324': 0.0,\n",
       " 'chr1||205325': 0.0,\n",
       " 'chr1||205326': 0.0,\n",
       " 'chr1||205333': 0.0,\n",
       " 'chr1||205334': 0.0,\n",
       " 'chr1||205337': 0.0,\n",
       " 'chr1||205338': 0.0,\n",
       " 'chr1||205354': 0.0,\n",
       " 'chr1||205355': 3.0,\n",
       " 'chr1||205358': 0.0,\n",
       " 'chr1||205359': 0.0,\n",
       " 'chr1||205369': 0.0,\n",
       " 'chr1||205370': 0.0,\n",
       " 'chr1||205395': 0.0,\n",
       " 'chr1||205396': 0.0,\n",
       " 'chr1||205410': 0.0,\n",
       " 'chr1||205411': 0.0,\n",
       " 'chr1||205414': 0.0,\n",
       " 'chr1||205415': 0.0,\n",
       " 'chr1||205417': 0.0,\n",
       " 'chr1||205418': 0.0,\n",
       " 'chr1||205420': 0.0,\n",
       " 'chr1||205421': 0.0,\n",
       " 'chr1||205433': 0.0,\n",
       " 'chr1||205434': 0.0,\n",
       " 'chr1||205448': 0.0,\n",
       " 'chr1||205449': 0.0,\n",
       " 'chr1||205456': 0.0,\n",
       " 'chr1||205457': 0.0,\n",
       " 'chr1||205463': 0.0,\n",
       " 'chr1||205464': 0.0,\n",
       " 'chr1||205466': 0.0,\n",
       " 'chr1||205467': 0.0,\n",
       " 'chr1||205469': 0.0,\n",
       " 'chr1||205470': 0.0,\n",
       " 'chr1||205473': 0.0,\n",
       " 'chr1||205474': 0.0,\n",
       " 'chr1||205494': 0.0,\n",
       " 'chr1||205495': 0.0,\n",
       " 'chr1||205504': 0.0,\n",
       " 'chr1||205505': 0.0,\n",
       " 'chr1||205509': 0.0,\n",
       " 'chr1||205510': 0.0,\n",
       " 'chr1||205511': 0.0,\n",
       " 'chr1||205512': 0.0,\n",
       " 'chr1||205518': 0.0,\n",
       " 'chr1||205519': 0.0,\n",
       " 'chr1||205522': 0.0,\n",
       " 'chr1||205523': 0.0,\n",
       " 'chr1||205532': 0.0,\n",
       " 'chr1||205533': 0.0,\n",
       " 'chr1||205547': 0.0,\n",
       " 'chr1||205548': 0.0,\n",
       " 'chr1||205551': 0.0,\n",
       " 'chr1||205552': 0.0,\n",
       " 'chr1||205559': 0.0,\n",
       " 'chr1||205560': 0.0,\n",
       " 'chr1||205561': 0.0,\n",
       " 'chr1||205562': 0.0,\n",
       " 'chr1||205584': 0.0,\n",
       " 'chr1||205585': 0.0,\n",
       " 'chr1||205599': 0.0,\n",
       " ...}"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bisulfite_bed='/home/xiaoyifu/data/HG003/bisulfite/HG003EMSeqlabs_1_val_1_bismark_bt2_pe.lab12_cov5_mock.deduplicated.bismark.cov.CG.bed'\n",
    "read_bed(bisulfite_bed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "def filter_n_and_update_indices(seq, pred_pos):\n",
    "    # Step 1: Initialize variables\n",
    "    n_count = 0\n",
    "    n_positions = []\n",
    "    \n",
    "    # Step 2: Traverse the sequence to identify 'N' positions and count them\n",
    "    for i, char in enumerate(seq):\n",
    "        if char == 'N':\n",
    "            n_positions.append(i)\n",
    "    \n",
    "    # Calculate number of 'N's to the left of each position in pred_pos\n",
    "    updated_pred_pos = []\n",
    "    \n",
    "    for pos in pred_pos:\n",
    "        # Calculate the number of 'N's before the current position\n",
    "        count_n_before_pos = sum(1 for n_pos in n_positions if n_pos < pos)\n",
    "        # Adjust the position by subtracting the number of 'N's before it\n",
    "        updated_pos = pos - count_n_before_pos\n",
    "        updated_pred_pos.append(updated_pos)\n",
    "    \n",
    "    # Step 3: Filter 'N' from the sequence\n",
    "    filtered_seq = ''.join([char for char in seq if char != 'N'])\n",
    "    return filtered_seq, updated_pred_pos, len(n_positions)\n",
    "\n",
    "def align_signals_and_extend_ref_seq(pos_pair, read_signal, read_seq, ref_seq,motif_seqs,methyloc,strand,ref_start,ref_end):\n",
    "    # 确定最前端和最末端有效的比对索引\n",
    "    first_valid_index = next((i for i, (_, ref_pos) in enumerate(pos_pair) if ref_pos is not None), len(pos_pair))\n",
    "    last_valid_index = len(pos_pair) - 1 - next((i for i, (_, ref_pos) in enumerate(reversed(pos_pair)) if ref_pos is not None), len(pos_pair))\n",
    "\n",
    "    # 生成新的 ref_seq 和 ref_signal\n",
    "    new_ref_seq = []\n",
    "    new_ref_signal = []\n",
    "\n",
    "    # 填充前端未比对的部分\n",
    "    for i in range(first_valid_index):\n",
    "        read_pos, _ = pos_pair[i]\n",
    "        if read_pos is not None:\n",
    "            new_ref_seq.append(read_seq[read_pos])\n",
    "            new_ref_signal.append(read_signal[read_pos])\n",
    "\n",
    "    last_valid_ref_pos = len(new_ref_seq) - 1\n",
    "\n",
    "    # 处理中间的比对部分\n",
    "    for i in range(first_valid_index, last_valid_index + 1):\n",
    "        read_pos, ref_pos = pos_pair[i]\n",
    "\n",
    "        if ref_pos is not None:\n",
    "            # 确保 new_ref_seq 的长度足够\n",
    "            # while len(new_ref_seq) < ref_pos:\n",
    "            #     new_ref_seq.append(None)  # 用 None 占位，表示插入的部分\n",
    "            #     new_ref_signal.append([]) # 对应的信号也填充为空列表\n",
    "            \n",
    "            new_ref_seq.append(ref_seq[ref_pos])\n",
    "            new_ref_signal.append(read_signal[read_pos] if read_pos is not None else [])\n",
    "            last_valid_ref_pos = len(new_ref_seq) - 1\n",
    "\n",
    "        elif ref_pos is None:\n",
    "            if last_valid_ref_pos is not None:\n",
    "                new_ref_signal[last_valid_ref_pos].extend(read_signal[read_pos])\n",
    "\n",
    "    # 填充后端未比对的部分\n",
    "    for i in range(last_valid_index + 1, len(pos_pair)):\n",
    "        read_pos, _ = pos_pair[i]\n",
    "        if read_pos is not None:\n",
    "            new_ref_seq.append(read_seq[read_pos])\n",
    "            new_ref_signal.append(read_signal[read_pos])\n",
    "            \n",
    "\n",
    "    new_ref_seq = ''.join(base  for base in new_ref_seq)\n",
    "    ref_readlocs = dict()\n",
    "    ref_poss = []\n",
    "    pred_pos = []\n",
    "    ref_pos = -1\n",
    "    tsite_locs = get_refloc_of_methysite_in_motif(\n",
    "        new_ref_seq, set(motif_seqs), methyloc)\n",
    "    for loc_in_read in tsite_locs:\n",
    "        if loc_in_read<first_valid_index:\n",
    "            ref_pos = -1\n",
    "            ref_poss.append(ref_pos)\n",
    "            pred_pos.append(loc_in_read)\n",
    "            continue\n",
    "        if loc_in_read>last_valid_index:\n",
    "            ref_pos = -1\n",
    "            ref_poss.append(ref_pos)\n",
    "            pred_pos.append(loc_in_read)\n",
    "            continue\n",
    "        if strand == \"-\":\n",
    "            ref_pos = ref_end-loc_in_read-1+first_valid_index\n",
    "        else:\n",
    "            ref_pos = ref_start+loc_in_read-first_valid_index\n",
    "        #ref_readlocs[loc_in_read+first_valid_index] = ref_pos\n",
    "        ref_poss.append(ref_pos)\n",
    "        pred_pos.append(loc_in_read)\n",
    "    new_ref_seq, pred_pos,n_lens=filter_n_and_update_indices(new_ref_seq, pred_pos)\n",
    "    ref_readlocs = dict(zip(pred_pos, ref_poss))\n",
    "    return new_ref_seq, new_ref_signal, ref_readlocs, ref_poss, pred_pos, n_lens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "pos_pair=[]\n",
    "for read_pos,ref_pos in bam_read.get_aligned_pairs():\n",
    "    # if ref_pos is None:\n",
    "    #     continue\n",
    "    if read_pos is None:\n",
    "        if bam_read.is_reverse:\n",
    "            pos_pair.append((None,ref_end-ref_pos-1))\n",
    "        else:\n",
    "            pos_pair.append((None,ref_pos-ref_start))\n",
    "        continue\n",
    "    if ref_pos is None:\n",
    "        if bam_read.is_reverse:\n",
    "            pos_pair.append((len(seq)-read_pos-1,None))\n",
    "        else:\n",
    "            pos_pair.append((read_pos,None))\n",
    "        continue\n",
    "    if bam_read.is_reverse:\n",
    "        pos_pair.append((len(seq)-read_pos-1,ref_end-ref_pos-1))\n",
    "    else:\n",
    "        pos_pair.append((read_pos,ref_pos-ref_start))\n",
    "#pos_pair=bam_read.get_aligned_pairs()\n",
    "#signal_group=align_signals(pos_pair,signal_group,ref_seq)\n",
    "\n",
    "if strand == \"-\":\n",
    "    pos_pair.reverse()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import random\n",
    " \n",
    "def generate_random_lists(rows, cols):\n",
    "    random_lists = [[random.randint(0, 100) for _ in range(cols)] for _ in range(rows)]\n",
    "    return random_lists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from deepsignal3.utils.process_utils import get_motif_seqs\n",
    "\n",
    "motif_seqs= get_motif_seqs('CG', True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "signal_group=generate_random_lists(len(seq),1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "nseq,signal_group, ref_readlocs, ref_poss, pred_pos,n_lens=align_signals_and_extend_ref_seq(pos_pair, signal_group, seq, rseq_complement,motif_seqs,0,strand,ref_start,ref_end)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TACTTCGTTCAGTTAGTAATTGCTGCTCCTCAGCTTGCAGATGGCCTATGGCCTTGGGATTTCACCTTGTGATCGTGTGAGTCAGTTCTCCTAATAAGCTCCCCTTCATATATACATCAATCCTATTAGTTCTGTCCCTTCAGGGAACCCTGACTAACACATCAAGTAATGTTACTTTTGGACGATTGGATACAATTCCATTATTCAGTGGCCTATTCAGTGGCCTTTGACTGTCATTGGTATCCAATATTGCTGTGTTAGTTGTTTTGATTATGATGGCATAGCAGAAATATATACAGTACATAGGGATGTAAAATATTAAAATATTTAGTCTAACCAAGGAAACATGAGATGTGGGTGAGAGCATATATTTATGACTCATTTTTCCTCATTTCCACTATTCCTTCCCTGAATTTTGTCTGTTTGGAAGAAATCAAATGGTTTTAAAGAAGCAAGTTGCCCTGGGCTCCTCCATCCTTTTGCATTCTTTAGACATTGTACCTTATTCTATGCATCTTATTGAAAATGGTTTTATAATCACTTATCACGTTTTGGTTTTAGTTCCTTCTTTAGCCCCACAAGTTTGTGTTTCCCTTGACTCTTTTTTTTTTTTTCTTAACTGAAAGTTTCTGAGCACTGAGCCAGTGTCAAGATTTACAGAATGTGGCTTTTATGATGGACTTCAAAAGTGGCTCCTTAGAATTGGATGAAGGCAAGTTTTCTGGGCATTGAGATTAATAGTCAACATTTTAATTCTTTGAGACCATTCTATTTCTGTTCTTTTCCGCTTGATGGACTTTACTATGAATGTTCTGAGATCACTTTTATAAAACTATGATATTATGCTTGCTGTCTCTGACGCTGAAATTCAGTGTTAGTTCTGTCTGTATCTATGATATTCTGCTTGCTGTCTCTGGCT\n",
      "919\n"
     ]
    }
   ],
   "source": [
    "print(seq)\n",
    "print(len(seq))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CAGAGACAGCAAGCAGAATATCATAGATACAGACAGAACTAACACTGAATTTCAGCGTCAGAGACAGCAAGCATAATATCATAGTTTTATAAAAGTGATCTCAGAACATTCATAGTAAAGTCCATCAAGCGGAAAAGAACAGAAcAGAATGGTCTCAAAGAATTAAAATGTTGACTATTAATCTCAATGCCCAGAAAAACTTGCCTTCATCCAATTCTAAGGAGCCACTTTTGAAGTCCATCATAAAAGCCACATTCTGTAAATCTTGACACTGGCTCAGTGCTCAGAAACTTTCAGTTAAGAAAAAAAAAAAAAGAGTCAAGGGAAACACAAAtTTGGCTAAAGAAGGAACTAAAACCAAAACGTGATAAGTGATTATAAAACCATTTTCAATAAGATGCATAGAATAAGGTACAATGTCTAAAGAATGCAAAAGGATGGAGGAGCCCAGGGCAACTTGCTTCTTTAAAACCATTTGATTTCTTCCAAACAGACAAAATTCAGGGAAGGAATAGTGGAAATGAGGAAAATGAGTCATAAATATATGCTCTCACCCACATCTCATGTTTCCTTGGTTAGACTAAATATTTTAATATTTACATCCCTATGTACTGTATATATATTTtTGCTATGCCATCATAATCAAAACAACTAACACAGCAATATTGGATACCAATGACAGTCAAAGGCCACTGAATAGGCCACTGAATAATGGAATTGTATCtAATCGTCCAAAAGTAACATTACTTGATGTGTTAGTCAGGGTTCCCTGAAGGGACAGAACTAATAGGATTGATGTATATATGAAGGGGAGCTTATTAGGAGAACTGACTCACACGATCACAAGGTGAAATCCCACaATAGGCCATCTGCAAGCTGAGGAGCA\n",
      "886\n",
      "TGCTCCTCAGCTTGCAGATGGCCTATNGTGGGATTTCACCTTGTGATCGTGTGAGTCAGTTCTCCTAATAAGCTCCCCTTCATATATACATCAATCCTATTAGTTCTGTCCCTTCAGGGAACCCTGACTAACACATCAAGTAATGTTACTTTTGGACGATTNGATACAATTCCATTATTCAGTGGCCTATTCAGTGGCCTTTGACTGTCATTGGTATCCAATATTGCTGTGTTAGTTGTTTTGATTATGATGGCATAGCANAAATATATATACAGTACATAGGGATGTAAATATTAAAATATTTAGTCTAACCAAGGAAACATGAGATGTGGGTGAGAGCATATATTTATGACTCATTTTCCTCATTTCCACTATTCCTTCCCTGAATTTTGTCTGTTTGGAAGAAATCAAATGGTTTTAAAGAAGCAAGTTGCCCTGGGCTCCTCCATCCTTTTGCATTCTTTAGACATTGTACCTTATTCTATGCATCTTATTGAAAATGGTTTTATAATCACTTATCACGTTTTGGTTTTAGTTCCTTCTTTAGCCAANTTTGTGTTTCCCTTGACTCTTTTTTTTTTTTTCTTAACTGAAAGTTTCTGAGCACTGAGCCAGTGTCAAGATTTACAGAATGTGGCTTTTATGATGGACTTCAAAAGTGGCTCCTTAGAATTGGATGAAGGCAAGTTTTTCTGGGCATTGAGATTAATAGTCAACATTTTAATTCTTTGAGACCATTCTNTTCTGTTCTTTTCCGCTTGATGGACTTTACTATGAATGTTCTGAGATCACTTTTATAAAACTATGATATTATGCTTGCTGTCTCTGACGCTGAAATTCAGTGTTAGTTCTGTCTGTATCTATGATATTCTGCTTGCTGTCTCTG\n",
      "886\n"
     ]
    }
   ],
   "source": [
    "print(rseq)\n",
    "print(len(rseq))\n",
    "print(rseq_complement)\n",
    "print(len(rseq_complement))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TACTTCGTTCAGTTAGTAATTGCTGCTCCTCAGCTTGCAGATGGCCTATGTGGGATTTCACCTTGTGATCGTGTGAGTCAGTTCTCCTAATAAGCTCCCCTTCATATATACATCAATCCTATTAGTTCTGTCCCTTCAGGGAACCCTGACTAACACATCAAGTAATGTTACTTTTGGACGATTGATACAATTCCATTATTCAGTGGCCTATTCAGTGGCCTTTGACTGTCATTGGTATCCAATATTGCTGTGTTAGTTGTTTTGATTATGATGGCATAGCAAAATATATATACAGTACATAGGGATGTAAATATTAAAATATTTAGTCTAACCAAGGAAACATGAGATGTGGGTGAGAGCATATATTTATGACTCATTTTCCTCATTTCCACTATTCCTTCCCTGAATTTTGTCTGTTTGGAAGAAATCAAATGGTTTTAAAGAAGCAAGTTGCCCTGGGCTCCTCCATCCTTTTGCATTCTTTAGACATTGTACCTTATTCTATGCATCTTATTGAAAATGGTTTTATAATCACTTATCACGTTTTGGTTTTAGTTCCTTCTTTAGCCAATTTGTGTTTCCCTTGACTCTTTTTTTTTTTTTCTTAACTGAAAGTTTCTGAGCACTGAGCCAGTGTCAAGATTTACAGAATGTGGCTTTTATGATGGACTTCAAAAGTGGCTCCTTAGAATTGGATGAAGGCAAGTTTTTCTGGGCATTGAGATTAATAGTCAACATTTTAATTCTTTGAGACCATTCTTTCTGTTCTTTTCCGCTTGATGGACTTTACTATGAATGTTCTGAGATCACTTTTATAAAACTATGATATTATGCTTGCTGTCTCTGACGCTGAAATTCAGTGTTAGTTCTGTCTGTATCTATGATATTCTGCTTGCTGTCTCTGGCT\n",
      "907\n",
      "5\n",
      "[5, 69, 178, 541, 773, 847]\n",
      "[-1, 163519605, 163519496, 163519131, 163518897, 163518823]\n"
     ]
    }
   ],
   "source": [
    "print(nseq)\n",
    "print(len(nseq))\n",
    "print(n_lens)\n",
    "print(pred_pos)\n",
    "print(ref_poss)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CG\n",
      "CG\n",
      "CG\n",
      "CG\n",
      "CG\n",
      "CG\n"
     ]
    }
   ],
   "source": [
    "for i in pred_pos:\n",
    "    print(nseq[i:i+2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GA\n",
      "\n",
      "GT\n",
      "\n",
      "GT\n",
      "\n",
      "GG\n",
      "\n",
      "GT\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in ref_poss:\n",
    "    if i==-1:\n",
    "        continue\n",
    "    print(rseq[i-ref_end:i+2-ref_end],end='')\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GA\n",
      "\n",
      "GT\n",
      "\n",
      "GT\n",
      "\n",
      "GG\n",
      "\n",
      "GT\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in ref_poss:\n",
    "    if i==-1:\n",
    "        continue\n",
    "    print(rseq[i-ref_start:i+2-ref_start],end='')\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in ref_poss:\n",
    "    if i==-1:\n",
    "        continue\n",
    "    print(rseq_complement[ref_end-i-1:ref_end-i+1],end='')\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TCGT\n",
      "\n",
      "ACGA\n",
      "\n",
      "ACGT\n",
      "\n",
      "CCGC\n",
      "\n",
      "ACGC\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in ref_poss:\n",
    "    if i==-1:\n",
    "        continue\n",
    "    print(rseq_complement[ref_start-i-2:ref_start-i+2],end='')\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in ref_poss:\n",
    "    print(contigs[reference_name][i:i+2],end='')\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "contigs_complement=complement_seq(contigs[reference_name])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n",
      "CG\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in ref_poss:\n",
    "    if i==-1:\n",
    "        continue\n",
    "    print(contigs_complement[-i-2:-i],end='')\n",
    "    print('\\n')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "deepsignalpenv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
